Background

Open-Meteo maintains an API for historical weather that allows for non-commercial usage of historical weather data maintained by the website.

This file runs exploratory analysis on some of the historical weather data.

Exploratory Analysis

The exploration process uses tidyverse and several generic custom functions:

library(tidyverse) # tidyverse functionality is included throughout
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
source("./Generic_Added_Utility_Functions_202105_v001.R") # Basic functions

A sample of data for 365 days has been downloaded as a CSV. The downloaded data has three separate files included in a single tab, separated by a blank row. The first file is location data, the second file is hourly data, and the third file is daily data. For initial exploration, parameters specific to this file are used:

omFileLoc <- "./RInputFiles/openmeteo_20230612_example.csv"

# Location data
omLocation <- readr::read_csv(omFileLoc, n_max=1, skip=0) 
## Rows: 1 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): timezone, timezone_abbreviation
## dbl (4): latitude, longitude, elevation, utc_offset_seconds
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
omLocation
## # A tibble: 1 × 6
##   latitude longitude elevation utc_offset_seconds timezone        timezone_abb…¹
##      <dbl>     <dbl>     <dbl>              <dbl> <chr>           <chr>         
## 1     41.8     -87.6       179             -18000 America/Chicago CDT           
## # … with abbreviated variable name ¹​timezone_abbreviation
# Hourly data 
# Elements: time, 2m temp (C), 2m dew point (C), 2m relative humidity (%), precip (mm), rain (mm), and snow (cm)
omHourlyRaw <- readr::read_csv(omFileLoc, n_max=8760, skip=3) 
## Rows: 8760 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): temperature_2m (°C), relativehumidity_2m (%), dewpoint_2m (°C), pr...
## dttm (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
omHourlyProcess <- omHourlyRaw %>%
    purrr::set_names(c("time", "temp2m_C", "relH2m", "dew2m_C", "precip_mm", "rain_mm", "snow_cm")) %>% 
    mutate(date=date(time))
omHourlyProcess
## # A tibble: 8,760 × 8
##    time                temp2…¹ relH2m dew2m_C preci…² rain_mm snow_cm date      
##    <dttm>                <dbl>  <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <date>    
##  1 2022-06-08 00:00:00    13.4     91    11.9     0       0         0 2022-06-08
##  2 2022-06-08 01:00:00    13.4     91    12       0       0         0 2022-06-08
##  3 2022-06-08 02:00:00    13.8     87    11.8     0       0         0 2022-06-08
##  4 2022-06-08 03:00:00    13.8     87    11.7     0       0         0 2022-06-08
##  5 2022-06-08 04:00:00    14       85    11.6     0       0         0 2022-06-08
##  6 2022-06-08 05:00:00    14.4     82    11.3     0       0         0 2022-06-08
##  7 2022-06-08 06:00:00    14.8     79    11.1     0       0         0 2022-06-08
##  8 2022-06-08 07:00:00    15.1     77    11.1     0.1     0.1       0 2022-06-08
##  9 2022-06-08 08:00:00    15.7     75    11.2     0       0         0 2022-06-08
## 10 2022-06-08 09:00:00    16.4     72    11.3     0       0         0 2022-06-08
## # … with 8,750 more rows, and abbreviated variable names ¹​temp2m_C, ²​precip_mm
summary(omHourlyProcess)
##       time                        temp2m_C          relH2m      
##  Min.   :2022-06-08 00:00:00   Min.   :-20.60   Min.   : 32.00  
##  1st Qu.:2022-09-07 05:45:00   1st Qu.:  2.80   1st Qu.: 62.00  
##  Median :2022-12-07 11:30:00   Median : 10.30   Median : 72.00  
##  Mean   :2022-12-07 11:30:00   Mean   : 10.81   Mean   : 72.38  
##  3rd Qu.:2023-03-08 17:15:00   3rd Qu.: 19.80   3rd Qu.: 83.00  
##  Max.   :2023-06-07 23:00:00   Max.   : 31.50   Max.   :100.00  
##                                NA's   :53       NA's   :53      
##     dew2m_C          precip_mm           rain_mm            snow_cm       
##  Min.   :-24.300   Min.   : 0.00000   Min.   : 0.00000   Min.   :0.00000  
##  1st Qu.: -1.400   1st Qu.: 0.00000   1st Qu.: 0.00000   1st Qu.:0.00000  
##  Median :  5.500   Median : 0.00000   Median : 0.00000   Median :0.00000  
##  Mean   :  5.792   Mean   : 0.09986   Mean   : 0.09167   Mean   :0.00573  
##  3rd Qu.: 14.700   3rd Qu.: 0.00000   3rd Qu.: 0.00000   3rd Qu.:0.00000  
##  Max.   : 24.200   Max.   :11.10000   Max.   :11.10000   Max.   :1.26000  
##  NA's   :53        NA's   :53         NA's   :53         NA's   :53       
##       date           
##  Min.   :2022-06-08  
##  1st Qu.:2022-09-07  
##  Median :2022-12-07  
##  Mean   :2022-12-07  
##  3rd Qu.:2023-03-08  
##  Max.   :2023-06-07  
## 
# Daily data 
# Elements: date, sum of precip (mm), sum of rain (mm), and sum of snow (cm)
omDailyRaw <- readr::read_csv(omFileLoc, n_max=365, skip=8765) 
## Rows: 365 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): precipitation_sum (mm), rain_sum (mm), snowfall_sum (cm)
## date (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
omDailyProcess <- omDailyRaw %>%
    purrr::set_names(c("date", "precip_mm", "rain_mm", "snow_cm"))
omDailyProcess
## # A tibble: 365 × 4
##    date       precip_mm rain_mm snow_cm
##    <date>         <dbl>   <dbl>   <dbl>
##  1 2022-06-08      16      16         0
##  2 2022-06-09       0       0         0
##  3 2022-06-10       0.6     0.6       0
##  4 2022-06-11       0       0         0
##  5 2022-06-12       1.3     1.3       0
##  6 2022-06-13       2.6     2.6       0
##  7 2022-06-14       0       0         0
##  8 2022-06-15       0       0         0
##  9 2022-06-16       9.5     9.5       0
## 10 2022-06-17       0       0         0
## # … with 355 more rows
summary(omDailyProcess)
##       date              precip_mm         rain_mm          snow_cm      
##  Min.   :2022-06-08   Min.   : 0.000   Min.   : 0.000   Min.   :0.0000  
##  1st Qu.:2022-09-07   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.:0.0000  
##  Median :2022-12-07   Median : 0.000   Median : 0.000   Median :0.0000  
##  Mean   :2022-12-07   Mean   : 2.402   Mean   : 2.205   Mean   :0.1379  
##  3rd Qu.:2023-03-08   3rd Qu.: 1.875   3rd Qu.: 1.300   3rd Qu.:0.0000  
##  Max.   :2023-06-07   Max.   :40.000   Max.   :40.000   Max.   :6.6500  
##                       NA's   :3        NA's   :3        NA's   :3

A function is written to read a portion of a CSV file:

partialCSVRead <- function(loc, firstRow=1L, lastRow=+Inf, col_names=TRUE, ...) {
    
    # FUNCTION arguments
    # loc: file location
    # firstRow: first row that is relevant to the partial file read (whether header line or data line)
    # last Row: last row that is relevant to the partial file read (+Inf means read until last line of file)
    # col_names: the col_names parameter passed to readr::read_csv
    #            TRUE means header=TRUE (get column names from file, read data starting on next line)
    #            FALSE means header=FALSE (auto-generate column names, read data starting on first line)
    #            character vector means use these as column names (read data starting on first line)
    # ...: additional arguments passed to read_csv

    # Read the file and return
    # skip: rows to be skipped are all those prior to firstRow
    # n_max: maximum rows read are lastRow-firstRow, with an additional data row when col_names is not TRUE
    readr::read_csv(loc, 
                    skip=firstRow-1, 
                    n_max=lastRow-firstRow+ifelse(isTRUE(col_names), 0, 1), 
                    ...
                    )
    
}

# Double check that data are the same
partialCSVRead(omFileLoc, firstRow=1L, lastRow=2L) %>% all.equal(omLocation)
## Rows: 1 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): timezone, timezone_abbreviation
## dbl (4): latitude, longitude, elevation, utc_offset_seconds
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] TRUE
partialCSVRead(omFileLoc, firstRow=4L, lastRow=8764L) %>% all.equal(omHourlyRaw)
## Rows: 8760 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): temperature_2m (°C), relativehumidity_2m (%), dewpoint_2m (°C), pr...
## dttm (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] TRUE
partialCSVRead(omFileLoc, firstRow=8766L, lastRow=+Inf) %>% all.equal(omDailyRaw)
## Rows: 365 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): precipitation_sum (mm), rain_sum (mm), snowfall_sum (cm)
## date (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] TRUE

The blank lines are assessed, allowing for all tables to be read at the same time:

# Get the break points for gaps in a vector (e.g., 0, 3, 5:8, 20 has break points 0, 3, 5, 20 and 0, 3, 8, 30)
vecGaps <- function(x, addElements=c(), sortUnique=TRUE) {
    
    if(length(addElements)>0) x <- c(addElements, x)
    if(isTRUE(sortUnique)) x <- unique(sort(x))
    list("starts"=c(x[is.na(lag(x)) | x-lag(x)>1], +Inf), 
         "ends"=x[is.na(lead(x)) | lead(x)-x>1]
         )
    
}

vecGaps(c(3, 5:8, 20), addElements=0)
## $starts
## [1]   0   3   5  20 Inf
## 
## $ends
## [1]  0  3  8 20
# Find the break points in a single file
flatFileGaps <- function(loc) {

    which(stringr::str_length(readLines(loc))==0) %>% vecGaps(addElements=0)
    
}

flatFileGaps(omFileLoc)
## $starts
## [1]    0    3 8765  Inf
## 
## $ends
## [1]    0    3 8765
# Read all relevant data as CSV with header
readMultiCSV <- function(loc, col_names=TRUE, ...) {

    gaps <- flatFileGaps(loc)
    
    lapply(seq_along(gaps$ends), 
           FUN=function(x) partialCSVRead(loc, 
                                          firstRow=gaps$ends[x]+1, 
                                          lastRow=gaps$starts[x+1]-1, 
                                          col_names=col_names, 
                                          ...
                                          )
           )
    
}

tstMultiCSV <- readMultiCSV(omFileLoc)
## Rows: 1 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): timezone, timezone_abbreviation
## dbl (4): latitude, longitude, elevation, utc_offset_seconds
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 8760 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (6): temperature_2m (°C), relativehumidity_2m (%), dewpoint_2m (°C), pr...
## dttm (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 365 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): precipitation_sum (mm), rain_sum (mm), snowfall_sum (cm)
## date (1): time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all.equal(tstMultiCSV[[1]], omLocation)
## [1] TRUE
all.equal(tstMultiCSV[[2]], omHourlyRaw)
## [1] TRUE
all.equal(tstMultiCSV[[3]], omDailyRaw)
## [1] TRUE

Data can also be downloaded through the Open-Meteo API, returning a JSON file. The data download has been completed off-line to minimize repeated hits against the server. The JSON file can then be read:

# Example download sequence
# download.file("https://archive-api.open-meteo.com/v1/archive?latitude=41.85&longitude=-87.65&start_date=2022-06-01&end_date=2023-06-08&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall&daily=precipitation_sum,rain_sum,snowfall_sum&timezone=America%2FChicago", "tempOM")

# Create hourly data tibble
jsonHourly <- jsonlite::read_json("tempOM", simplifyVector = TRUE)[["hourly"]] %>% 
    tibble::as_tibble() %>% 
    mutate(tm=lubridate::ymd_hm(time), date=date(tm))
jsonHourly
## # A tibble: 8,952 × 9
##    time        tempe…¹ relat…² dewpo…³ preci…⁴  rain snowf…⁵ tm                 
##    <chr>         <dbl>   <int>   <dbl>   <dbl> <dbl>   <dbl> <dttm>             
##  1 2022-06-01…    21        92    19.6     0.1   0.1       0 2022-06-01 00:00:00
##  2 2022-06-01…    20.6      93    19.5     0.3   0.3       0 2022-06-01 01:00:00
##  3 2022-06-01…    21        93    19.8     0     0         0 2022-06-01 02:00:00
##  4 2022-06-01…    20.8      93    19.7     0     0         0 2022-06-01 03:00:00
##  5 2022-06-01…    20.5      93    19.4     0     0         0 2022-06-01 04:00:00
##  6 2022-06-01…    19.8      95    19       0.7   0.7       0 2022-06-01 05:00:00
##  7 2022-06-01…    19.3      97    18.8     1.6   1.6       0 2022-06-01 06:00:00
##  8 2022-06-01…    19        97    18.4     1     1         0 2022-06-01 07:00:00
##  9 2022-06-01…    18.1      92    16.9     0.1   0.1       0 2022-06-01 08:00:00
## 10 2022-06-01…    16.8      87    14.6     0     0         0 2022-06-01 09:00:00
## # … with 8,942 more rows, 1 more variable: date <date>, and abbreviated
## #   variable names ¹​temperature_2m, ²​relativehumidity_2m, ³​dewpoint_2m,
## #   ⁴​precipitation, ⁵​snowfall
# Create daily data tibble
jsonDaily <- jsonlite::read_json("tempOM", simplifyVector = TRUE)[["daily"]] %>% 
    tibble::as_tibble()
jsonDaily
## # A tibble: 373 × 4
##    time       precipitation_sum rain_sum snowfall_sum
##    <chr>                  <dbl>    <dbl>        <dbl>
##  1 2022-06-01               3.8      3.8            0
##  2 2022-06-02               0        0              0
##  3 2022-06-03               0        0              0
##  4 2022-06-04               1.3      1.3            0
##  5 2022-06-05               0.3      0.3            0
##  6 2022-06-06              12.5     12.5            0
##  7 2022-06-07               2        2              0
##  8 2022-06-08              16       16              0
##  9 2022-06-09               0        0              0
## 10 2022-06-10               0.6      0.6            0
## # … with 363 more rows
# Extract other elements
jsonNames <- jsonlite::read_json("tempOM", simplifyVector = TRUE) %>% names
for (jsonName in jsonNames[!(jsonNames %in% c("daily", "hourly", "daily_units", "hourly_units"))]) {
    cat("\n", jsonName, ":", jsonlite::read_json("tempOM", simplifyVector = TRUE)[[jsonName]])
}
## 
##  latitude : 41.8
##  longitude : -87.6
##  generationtime_ms : 2.892971
##  utc_offset_seconds : -18000
##  timezone : America/Chicago
##  timezone_abbreviation : CDT
##  elevation : 179
for (jsonName in jsonNames[jsonNames %in% c("daily_units", "hourly_units")]) {
    cat("\n", jsonName, ":\n")
    print(jsonlite::read_json("tempOM", simplifyVector = TRUE)[[jsonName]] %>% tibble::as_tibble() %>% t())
}
## 
##  hourly_units :
##                     [,1]     
## time                "iso8601"
## temperature_2m      "°C"     
## relativehumidity_2m "%"      
## dewpoint_2m         "°C"     
## precipitation       "mm"     
## rain                "mm"     
## snowfall            "cm"     
## 
##  daily_units :
##                   [,1]     
## time              "iso8601"
## precipitation_sum "mm"     
## rain_sum          "mm"     
## snowfall_sum      "cm"

Daily data read from JSON and CSV are compared:

# Convert variable names in JSON daily data
jsonDailyProcess <- jsonDaily %>%
    colRenamer(vecRename=c("precipitation_sum"="precip_mm", 
                           "rain_sum"="rain_mm", 
                           "snowfall_sum"="snow_cm", 
                           "time"="date"
                           )
               ) %>%
    mutate(date=as.Date(date))
jsonDailyProcess
## # A tibble: 373 × 4
##    date       precip_mm rain_mm snow_cm
##    <date>         <dbl>   <dbl>   <dbl>
##  1 2022-06-01       3.8     3.8       0
##  2 2022-06-02       0       0         0
##  3 2022-06-03       0       0         0
##  4 2022-06-04       1.3     1.3       0
##  5 2022-06-05       0.3     0.3       0
##  6 2022-06-06      12.5    12.5       0
##  7 2022-06-07       2       2         0
##  8 2022-06-08      16      16         0
##  9 2022-06-09       0       0         0
## 10 2022-06-10       0.6     0.6       0
## # … with 363 more rows
# Check dates included
omDailyProcess %>% 
    select(date) %>% 
    mutate(inCSV=1) %>% 
    full_join(mutate(select(jsonDailyProcess, "date"), inJSON=1), by="date") %>%
    filter(!complete.cases(.))
## # A tibble: 8 × 3
##   date       inCSV inJSON
##   <date>     <dbl>  <dbl>
## 1 2022-06-01    NA      1
## 2 2022-06-02    NA      1
## 3 2022-06-03    NA      1
## 4 2022-06-04    NA      1
## 5 2022-06-05    NA      1
## 6 2022-06-06    NA      1
## 7 2022-06-07    NA      1
## 8 2023-06-08    NA      1
# Check column names
all.equal(names(omDailyProcess), names(jsonDailyProcess))
## [1] TRUE
# Check data elements from 2022-06-08 through 2023-06-04 (last full day of data)
all.equal(omDailyProcess %>% tibble::as_tibble() %>% filter(date>="2022-06-08", date<="2023-06-04"), 
          jsonDailyProcess %>% filter(date>="2022-06-08", date<="2023-06-04")
          )
## [1] TRUE

Hourly data read from JSON and CSV are compared:

# Convert variable names in JSON hourly data
jsonHourlyProcess <- jsonHourly %>% 
    select(-time) %>%
    colRenamer(vecRename=c("temperature_2m"="temp2m_C", 
                           "relativehumidity_2m"="relH2m", 
                           "dewpoint_2m"="dew2m_C", 
                           "precipitation"="precip_mm", 
                           "rain"="rain_mm", 
                           "snowfall"="snow_cm",
                           "tm"="time"
                           )
               ) %>% 
    select(time, everything())
jsonHourlyProcess
## # A tibble: 8,952 × 8
##    time                temp2…¹ relH2m dew2m_C preci…² rain_mm snow_cm date      
##    <dttm>                <dbl>  <int>   <dbl>   <dbl>   <dbl>   <dbl> <date>    
##  1 2022-06-01 00:00:00    21       92    19.6     0.1     0.1       0 2022-06-01
##  2 2022-06-01 01:00:00    20.6     93    19.5     0.3     0.3       0 2022-06-01
##  3 2022-06-01 02:00:00    21       93    19.8     0       0         0 2022-06-01
##  4 2022-06-01 03:00:00    20.8     93    19.7     0       0         0 2022-06-01
##  5 2022-06-01 04:00:00    20.5     93    19.4     0       0         0 2022-06-01
##  6 2022-06-01 05:00:00    19.8     95    19       0.7     0.7       0 2022-06-01
##  7 2022-06-01 06:00:00    19.3     97    18.8     1.6     1.6       0 2022-06-01
##  8 2022-06-01 07:00:00    19       97    18.4     1       1         0 2022-06-01
##  9 2022-06-01 08:00:00    18.1     92    16.9     0.1     0.1       0 2022-06-01
## 10 2022-06-01 09:00:00    16.8     87    14.6     0       0         0 2022-06-01
## # … with 8,942 more rows, and abbreviated variable names ¹​temp2m_C, ²​precip_mm
# Check dates included
omHourlyProcess %>% 
    count(date, name="nCSV") %>% 
    full_join(count(jsonHourlyProcess, date, name="nJSON"), by="date") %>%
    filter(!complete.cases(.))
## # A tibble: 8 × 3
##   date        nCSV nJSON
##   <date>     <int> <int>
## 1 2022-06-01    NA    24
## 2 2022-06-02    NA    24
## 3 2022-06-03    NA    24
## 4 2022-06-04    NA    24
## 5 2022-06-05    NA    24
## 6 2022-06-06    NA    24
## 7 2022-06-07    NA    24
## 8 2023-06-08    NA    24
# Check column names
all.equal(names(omHourlyProcess), names(jsonHourlyProcess))
## [1] TRUE
# Check data elements from 2022-06-08 through 2023-06-04 (last full day of data)
all.equal(omHourlyProcess %>% tibble::as_tibble() %>% filter(date>="2022-06-08", date<="2023-06-04"), 
          jsonHourlyProcess %>% filter(date>="2022-06-08", date<="2023-06-04")
          )
## [1] TRUE

Metrics that can be reuested for hourly and daily data include:

hourlyMetrics <- "temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm"
dailyMetrics <- "weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration"

hourlyDescription <- "Air temperature at 2 meters above ground\nRelative humidity at 2 meters above ground\nDew point temperature at 2 meters above ground\nApparent temperature is the perceived feels-like temperature combining wind chill factor, relative humidity and solar radiation\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nAtmospheric air pressure reduced to mean sea level (msl) or pressure at surface. Typically pressure on mean sea level is used in meteorology. Surface pressure gets lower with increasing elevation.\nTotal precipitation (rain, showers, snow) sum of the preceding hour. Data is stored with a 0.1 mm precision. If precipitation data is summed up to monthly sums, there might be small inconsistencies with the total precipitation amount.\nOnly liquid precipitation of the preceding hour including local showers and rain from large scale systems.\nSnowfall amount of the preceding hour in centimeters. For the water equivalent in millimeter, divide by 7. E.g. 7 cm snow = 10 mm precipitation water equivalent\nTotal cloud cover as an area fraction\nLow level clouds and fog up to 2 km altitude\nMid level clouds from 2 to 6 km altitude\nHigh level clouds from 6 km altitude\nShortwave solar radiation as average of the preceding hour. This is equal to the total global horizontal irradiation\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDirect solar radiation as average of the preceding hour on the horizontal plane and the normal plane (perpendicular to the sun)\nDiffuse solar radiation as average of the preceding hour\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind speed at 10 or 100 meters above ground. Wind speed on 10 meters is the standard level.\nWind direction at 10 or 100 meters above ground\nWind direction at 10 or 100 meters above ground\nGusts at 10 meters above ground of the indicated hour. Wind gusts in CERRA are defined as the maximum wind gusts of the preceding hour. Please consult the ECMWF IFS documentation for more information on how wind gusts are parameterized in weather models.\nET0 Reference Evapotranspiration of a well watered grass field. Based on FAO-56 Penman-Monteith equations ET0 is calculated from temperature, wind speed, humidity and solar radiation. Unlimited soil water is assumed. ET0 is commonly used to estimate the required irrigation for plants.\nWeather condition as a numeric code. Follow WMO weather interpretation codes. See table below for details. Weather code is calculated from cloud cover analysis, precipitation and snowfall. As barely no information about atmospheric stability is available, estimation about thunderstorms is not possible.\nVapor Pressure Deificit (VPD) in kilopascal (kPa). For high VPD (>1.6), water transpiration of plants increases. For low VPD (<0.4), transpiration decreases\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage temperature of different soil levels below ground.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths.\nAverage soil water content as volumetric mixing ratio at 0-7, 7-28, 28-100 and 100-255 cm depths."
dailyDescription <- "The most severe weather condition on a given day\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily air temperature at 2 meters above ground\nMaximum and minimum daily apparent temperature\nMaximum and minimum daily apparent temperature\nSum of daily precipitation (including rain, showers and snowfall)\nSum of daily rain\nSum of daily snowfall\nThe number of hours with rain\nSun rise and set times\nSun rise and set times\nMaximum wind speed and gusts on a day\nMaximum wind speed and gusts on a day\nDominant wind direction\nThe sum of solar radiaion on a given day in Megajoules\nDaily sum of ET0 Reference Evapotranspiration of a well watered grass field"

# Create tibble for hourly metrics
tblMetricsHourly <- tibble::tibble(metric=hourlyMetrics %>% str_split_1(","), 
                                   description=hourlyDescription %>% str_split_1("\n")
                                   )
tblMetricsHourly %>% 
    print(n=50)
## # A tibble: 33 × 2
##    metric                        description                                    
##    <chr>                         <chr>                                          
##  1 temperature_2m                Air temperature at 2 meters above ground       
##  2 relativehumidity_2m           Relative humidity at 2 meters above ground     
##  3 dewpoint_2m                   Dew point temperature at 2 meters above ground 
##  4 apparent_temperature          Apparent temperature is the perceived feels-li…
##  5 pressure_msl                  Atmospheric air pressure reduced to mean sea l…
##  6 surface_pressure              Atmospheric air pressure reduced to mean sea l…
##  7 precipitation                 Total precipitation (rain, showers, snow) sum …
##  8 rain                          Only liquid precipitation of the preceding hou…
##  9 snowfall                      Snowfall amount of the preceding hour in centi…
## 10 cloudcover                    Total cloud cover as an area fraction          
## 11 cloudcover_low                Low level clouds and fog up to 2 km altitude   
## 12 cloudcover_mid                Mid level clouds from 2 to 6 km altitude       
## 13 cloudcover_high               High level clouds from 6 km altitude           
## 14 shortwave_radiation           Shortwave solar radiation as average of the pr…
## 15 direct_radiation              Direct solar radiation as average of the prece…
## 16 direct_normal_irradiance      Direct solar radiation as average of the prece…
## 17 diffuse_radiation             Diffuse solar radiation as average of the prec…
## 18 windspeed_10m                 Wind speed at 10 or 100 meters above ground. W…
## 19 windspeed_100m                Wind speed at 10 or 100 meters above ground. W…
## 20 winddirection_10m             Wind direction at 10 or 100 meters above ground
## 21 winddirection_100m            Wind direction at 10 or 100 meters above ground
## 22 windgusts_10m                 Gusts at 10 meters above ground of the indicat…
## 23 et0_fao_evapotranspiration    ET0 Reference Evapotranspiration of a well wat…
## 24 weathercode                   Weather condition as a numeric code. Follow WM…
## 25 vapor_pressure_deficit        Vapor Pressure Deificit (VPD) in kilopascal (k…
## 26 soil_temperature_0_to_7cm     Average temperature of different soil levels b…
## 27 soil_temperature_7_to_28cm    Average temperature of different soil levels b…
## 28 soil_temperature_28_to_100cm  Average temperature of different soil levels b…
## 29 soil_temperature_100_to_255cm Average temperature of different soil levels b…
## 30 soil_moisture_0_to_7cm        Average soil water content as volumetric mixin…
## 31 soil_moisture_7_to_28cm       Average soil water content as volumetric mixin…
## 32 soil_moisture_28_to_100cm     Average soil water content as volumetric mixin…
## 33 soil_moisture_100_to_255cm    Average soil water content as volumetric mixin…
# Create tibble for daily metrics
tblMetricsDaily <- tibble::tibble(metric=dailyMetrics %>% str_split_1(","), 
                                  description=dailyDescription %>% str_split_1("\n")
                                   )
tblMetricsDaily
## # A tibble: 16 × 2
##    metric                     description                                       
##    <chr>                      <chr>                                             
##  1 weathercode                The most severe weather condition on a given day  
##  2 temperature_2m_max         Maximum and minimum daily air temperature at 2 me…
##  3 temperature_2m_min         Maximum and minimum daily air temperature at 2 me…
##  4 apparent_temperature_max   Maximum and minimum daily apparent temperature    
##  5 apparent_temperature_min   Maximum and minimum daily apparent temperature    
##  6 precipitation_sum          Sum of daily precipitation (including rain, showe…
##  7 rain_sum                   Sum of daily rain                                 
##  8 snowfall_sum               Sum of daily snowfall                             
##  9 precipitation_hours        The number of hours with rain                     
## 10 sunrise                    Sun rise and set times                            
## 11 sunset                     Sun rise and set times                            
## 12 windspeed_10m_max          Maximum wind speed and gusts on a day             
## 13 windgusts_10m_max          Maximum wind speed and gusts on a day             
## 14 winddirection_10m_dominant Dominant wind direction                           
## 15 shortwave_radiation_sum    The sum of solar radiaion on a given day in Megaj…
## 16 et0_fao_evapotranspiration Daily sum of ET0 Reference Evapotranspiration of …

Data can then be assembled into a string that is compatible with the Open-Meteo API format:

openMeteoURLCreate <- function(mainURL="https://archive-api.open-meteo.com/v1/archive", 
                               lat=45, 
                               lon=-90, 
                               startDate=paste(year(Sys.Date())-1, "01", "01", sep="-"), 
                               endDate=paste(year(Sys.Date())-1, "12", "31", sep="-"), 
                               hourlyMetrics=NULL, 
                               dailyMetrics=NULL,
                               tz="GMT", 
                               ...
                               ) {
    
    # Create formatted string
    fString <- paste0(mainURL, 
                      "?latitude=", 
                      lat, 
                      "&longitude=", 
                      lon, 
                      "&start_date=", 
                      startDate, 
                      "&end_date=", 
                      endDate
                      )
    if(!is.null(hourlyMetrics)) fString <- paste0(fString, "&hourly=", hourlyMetrics)
    if(!is.null(dailyMetrics)) fString <- paste0(fString, "&daily=", dailyMetrics)
    
    # Return the formatted string
    paste0(fString, "&timezone=", stringr::str_replace(tz, "/", "%2F"), ...)
    
}

# Blank example
openMeteoURLCreate()
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=45&longitude=-90&start_date=2022-01-01&end_date=2022-12-31&timezone=GMT"
# Matching previous CSV data pull
openMeteoURLCreate(lat=41.85, 
                   lon=-87.65, 
                   startDate="2022-06-01", 
                   endDate="2023-06-08", 
                   hourlyMetrics="temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall", 
                   dailyMetrics="precipitation_sum,rain_sum,snowfall_sum", 
                   tz="America/Chicago"
                   )
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.85&longitude=-87.65&start_date=2022-06-01&end_date=2023-06-08&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall&daily=precipitation_sum,rain_sum,snowfall_sum&timezone=America%2FChicago"

A helper function is created to convert cities to lat/lon and to allow for selection of hourly and daily metrics by index number:

helperOpenMeteoURL <- function(cityName=NULL,
                               lat=NULL,
                               lon=NULL,
                               hourlyMetrics=NULL,
                               hourlyIndices=NULL,
                               hourlyDesc=tblMetricsHourly,
                               dailyMetrics=NULL,
                               dailyIndices=NULL,
                               dailyDesc=tblMetricsDaily,
                               startDate=NULL, 
                               endDate=NULL, 
                               tz=NULL,
                               ...
                               ) {
    
    # Convert city to lat/lon if lat/lon are NULL
    if(is.null(lat) | is.null(lon)) {
        if(is.null(cityName)) stop("\nMust provide lat/lon or city name available in maps::us.cities\n")
        cityData <- maps::us.cities %>% tibble::as_tibble() %>% filter(name==cityName)
        if(nrow(cityData)!=1) stop("\nMust provide city name that maps uniquely to maps::us.cities$name\n")
        lat <- cityData$lat[1]
        lon <- cityData$long[1]
    }
    
    # Get hourly metrics by index if relevant
    if(is.null(hourlyMetrics) & !is.null(hourlyIndices)) {
        hourlyMetrics <- hourlyDesc %>% slice(hourlyIndices) %>% pull(metric)
        hourlyMetrics <- paste0(hourlyMetrics, collapse=",")
        cat("\nHourly metrics created from indices:", hourlyMetrics, "\n\n")
    }
    
    # Get daily metrics by index if relevant
    if(is.null(dailyMetrics) & !is.null(dailyIndices)) {
        dailyMetrics <- dailyDesc %>% slice(dailyIndices) %>% pull(metric)
        dailyMetrics <- paste0(dailyMetrics, collapse=",")
        cat("\nDaily metrics created from indices:", dailyMetrics, "\n\n")
    }
    
    # Use default values from OpenMeteoURLCreate() for startDate, endDate, and tz if passed as NULL
    if(is.null(startDate)) startDate <- eval(formals(openMeteoURLCreate)$startDate)
    if(is.null(endDate)) endDate <- eval(formals(openMeteoURLCreate)$endDate)
    if(is.null(tz)) tz <- eval(formals(openMeteoURLCreate)$tz)
    
    # Create and return URL
    openMeteoURLCreate(lat=lat,
                       lon=lon, 
                       startDate=startDate, 
                       endDate=endDate, 
                       hourlyMetrics=hourlyMetrics, 
                       dailyMetrics=dailyMetrics, 
                       tz=tz,
                       ...
                       )
    
}

The URL is tested for file download, cached to avoid multiple hits to the server:

testURL <- helperOpenMeteoURL(cityName="Chicago IL", 
                              hourlyIndices=c(1:3, 7:9),
                              dailyIndices=6:8,
                              startDate="2022-06-01", 
                              endDate="2023-06-08", 
                              tz="America/Chicago"
                              )
## 
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall 
## 
## 
## Daily metrics created from indices: precipitation_sum,rain_sum,snowfall_sum
testURL
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.84&longitude=-87.68&start_date=2022-06-01&end_date=2023-06-08&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,precipitation,rain,snowfall&daily=precipitation_sum,rain_sum,snowfall_sum&timezone=America%2FChicago"
# Download file
if(!file.exists("notuse_testOM.json")) {
    fileDownload(fileName="notuse_testOM.json", url=testURL)
} else {
    cat("\nFile notuse_testOM.json already exists, skipping download\n")
}
##                      size isdir mode               mtime               ctime
## notuse_testOM.json 426872 FALSE  666 2023-06-19 08:56:49 2023-06-19 08:56:45
##                                  atime exe
## notuse_testOM.json 2023-06-19 08:56:49  no

Code is created to read the JSON return object:

readOpenMeteoJSON <- function(js) {
    
    # FUNCTION arguments: 
    # js: JSON list returned by download from Open-Meteo
    
    # Get the object and names
    jsObj <- jsonlite::read_json(js, simplifyVector = TRUE)
    nms <- jsObj %>% names()
    cat("\nObjects in JSON include:", paste(nms, collapse=", "), "\n\n")
    
    # Set default objects as NULL
    tblDaily <- NULL
    tblHourly <- NULL
    tblUnitsDaily <- NULL
    tblUnitsHourly <- NULL
    
    # Get daily and hourly as tibble if relevant
    if("daily" %in% nms) tblDaily <- jsObj$daily %>% tibble::as_tibble()
    if("hourly" %in% nms) tblHourly <- jsObj$hourly %>% tibble::as_tibble()
    
    # Helper function for unit conversions
    helperMetricUnit <- function(x, mapper, desc) {
        x %>% 
            tibble::as_tibble() %>% 
            pivot_longer(cols=everything()) %>% 
            left_join(mapper, by=c("name"="metric")) %>% 
            mutate(value=stringr::str_replace(value, "\u00b0", "deg ")) %>% 
            mutate(metricType=desc) %>% 
            select(metricType, everything())
    }
    
    # Get the unit descriptions
    if("daily_units" %in% nms) 
        tblUnitsDaily <- helperMetricUnit(jsObj$daily_units, tblMetricsDaily, desc="daily_units")
    if("hourly_units" %in% nms) 
        tblUnitsHourly <- helperMetricUnit(jsObj$hourly_units, tblMetricsHourly, desc="hourly_units")
    if(is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) tblUnits <- tblUnitsHourly
    else if(!is.null(tblUnitsDaily) & is.null(tblUnitsHourly)) tblUnits <- tblUnitsDaily
    else if(!is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) 
        tblUnits <- bind_rows(tblUnitsHourly, tblUnitsDaily)
    else tblUnits <- NULL
    
    # Put everything else together
    tblDescription <- jsObj[setdiff(nms, c("hourly", "hourly_units", "daily", "daily_units"))] %>%
        tibble::as_tibble()
    
    # Return the list objects
    list(tblDaily=tblDaily, tblHourly=tblHourly, tblUnits=tblUnits, tblDescription=tblDescription)
    
}

prettyOpenMeteoMeta <- function(df, extr="tblDescription") {
    if("list" %in% class(df)) df <- df[[extr]]
    for(name in names(df)) {
        cat("\n", name, ": ", df %>% pull(name), sep="")
    }
    cat("\n\n")
}


tmpOM <- readOpenMeteoJSON("notuse_testOM.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly, daily_units, daily
tmpOM
## $tblDaily
## # A tibble: 373 × 4
##    time       precipitation_sum rain_sum snowfall_sum
##    <chr>                  <dbl>    <dbl>        <dbl>
##  1 2022-06-01               3.8      3.8            0
##  2 2022-06-02               0        0              0
##  3 2022-06-03               0        0              0
##  4 2022-06-04               1.3      1.3            0
##  5 2022-06-05               0.3      0.3            0
##  6 2022-06-06              12.5     12.5            0
##  7 2022-06-07               2        2              0
##  8 2022-06-08              16       16              0
##  9 2022-06-09               0        0              0
## 10 2022-06-10               0.6      0.6            0
## # … with 363 more rows
## 
## $tblHourly
## # A tibble: 8,952 × 7
##    time             temperature_2m relativehumid…¹ dewpo…² preci…³  rain snowf…⁴
##    <chr>                     <dbl>           <int>   <dbl>   <dbl> <dbl>   <dbl>
##  1 2022-06-01T00:00           21                92    19.7     0.1   0.1       0
##  2 2022-06-01T01:00           20.6              94    19.6     0.3   0.3       0
##  3 2022-06-01T02:00           21.1              93    19.9     0     0         0
##  4 2022-06-01T03:00           20.8              93    19.7     0     0         0
##  5 2022-06-01T04:00           20.5              93    19.3     0     0         0
##  6 2022-06-01T05:00           19.7              95    19       0.7   0.7       0
##  7 2022-06-01T06:00           19.4              96    18.8     1.6   1.6       0
##  8 2022-06-01T07:00           19.2              96    18.5     1     1         0
##  9 2022-06-01T08:00           18.6              90    17       0.1   0.1       0
## 10 2022-06-01T09:00           17.7              84    14.9     0     0         0
## # … with 8,942 more rows, and abbreviated variable names ¹​relativehumidity_2m,
## #   ²​dewpoint_2m, ³​precipitation, ⁴​snowfall
## 
## $tblUnits
## # A tibble: 11 × 4
##    metricType   name                value   description                         
##    <chr>        <chr>               <chr>   <chr>                               
##  1 hourly_units time                iso8601 <NA>                                
##  2 hourly_units temperature_2m      deg C   Air temperature at 2 meters above g…
##  3 hourly_units relativehumidity_2m %       Relative humidity at 2 meters above…
##  4 hourly_units dewpoint_2m         deg C   Dew point temperature at 2 meters a…
##  5 hourly_units precipitation       mm      Total precipitation (rain, showers,…
##  6 hourly_units rain                mm      Only liquid precipitation of the pr…
##  7 hourly_units snowfall            cm      Snowfall amount of the preceding ho…
##  8 daily_units  time                iso8601 <NA>                                
##  9 daily_units  precipitation_sum   mm      Sum of daily precipitation (includi…
## 10 daily_units  rain_sum            mm      Sum of daily rain                   
## 11 daily_units  snowfall_sum        cm      Sum of daily snowfall               
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
##      <dbl>     <dbl>             <dbl>             <int> <chr>   <chr>     <dbl>
## 1     41.8     -87.7              2.66            -18000 Americ… CDT         180
## # … with abbreviated variable names ¹​utc_offset_seconds, ²​timezone,
## #   ³​timezone_abbreviation, ⁴​elevation
prettyOpenMeteoMeta(tmpOM)
## 
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 2.658963
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180

Conversion functions are written for hourly and daily data:

omProcessDaily <- function(tbl, extr="tblDaily") {
    if("list" %in% class(tbl)) tbl <- tbl[[extr]]
    tbl %>% mutate(date=lubridate::ymd(time)) %>% select(date, everything())
}

omProcessHourly <- function(tbl, extr="tblHourly") {
    if("list" %in% class(tbl)) tbl <- tbl[[extr]]
    tbl %>% 
        mutate(origTime=time, 
               time=lubridate::ymd_hm(time), 
               date=lubridate::date(time), 
               hour=lubridate::hour(time)
               ) %>% 
        select(time, date, hour, everything())
}

omProcessDaily(tmpOM)
## # A tibble: 373 × 5
##    date       time       precipitation_sum rain_sum snowfall_sum
##    <date>     <chr>                  <dbl>    <dbl>        <dbl>
##  1 2022-06-01 2022-06-01               3.8      3.8            0
##  2 2022-06-02 2022-06-02               0        0              0
##  3 2022-06-03 2022-06-03               0        0              0
##  4 2022-06-04 2022-06-04               1.3      1.3            0
##  5 2022-06-05 2022-06-05               0.3      0.3            0
##  6 2022-06-06 2022-06-06              12.5     12.5            0
##  7 2022-06-07 2022-06-07               2        2              0
##  8 2022-06-08 2022-06-08              16       16              0
##  9 2022-06-09 2022-06-09               0        0              0
## 10 2022-06-10 2022-06-10               0.6      0.6            0
## # … with 363 more rows
omProcessHourly(tmpOM)
## # A tibble: 8,952 × 10
##    time                date        hour temperat…¹ relat…² dewpo…³ preci…⁴  rain
##    <dttm>              <date>     <int>      <dbl>   <int>   <dbl>   <dbl> <dbl>
##  1 2022-06-01 00:00:00 2022-06-01     0       21        92    19.7     0.1   0.1
##  2 2022-06-01 01:00:00 2022-06-01     1       20.6      94    19.6     0.3   0.3
##  3 2022-06-01 02:00:00 2022-06-01     2       21.1      93    19.9     0     0  
##  4 2022-06-01 03:00:00 2022-06-01     3       20.8      93    19.7     0     0  
##  5 2022-06-01 04:00:00 2022-06-01     4       20.5      93    19.3     0     0  
##  6 2022-06-01 05:00:00 2022-06-01     5       19.7      95    19       0.7   0.7
##  7 2022-06-01 06:00:00 2022-06-01     6       19.4      96    18.8     1.6   1.6
##  8 2022-06-01 07:00:00 2022-06-01     7       19.2      96    18.5     1     1  
##  9 2022-06-01 08:00:00 2022-06-01     8       18.6      90    17       0.1   0.1
## 10 2022-06-01 09:00:00 2022-06-01     9       17.7      84    14.9     0     0  
## # … with 8,942 more rows, 2 more variables: snowfall <dbl>, origTime <chr>, and
## #   abbreviated variable names ¹​temperature_2m, ²​relativehumidity_2m,
## #   ³​dewpoint_2m, ⁴​precipitation

Function readOpenMeteoJSON() is updated to automatically incorporate date conversions:

readOpenMeteoJSON <- function(js, mapDaily=tblMetricsDaily, mapHourly=tblMetricsHourly) {
    
    # FUNCTION arguments: 
    # js: JSON list returned by download from Open-Meteo
    # mapDaily: mapping file for daily metrics
    # mapHourly: mapping file for hourly metrics
    
    # Get the object and names
    jsObj <- jsonlite::read_json(js, simplifyVector = TRUE)
    nms <- jsObj %>% names()
    cat("\nObjects in JSON include:", paste(nms, collapse=", "), "\n\n")
    
    # Set default objects as NULL
    tblDaily <- NULL
    tblHourly <- NULL
    tblUnitsDaily <- NULL
    tblUnitsHourly <- NULL
    
    # Get daily and hourly as tibble if relevant
    if("daily" %in% nms) tblDaily <- jsObj$daily %>% tibble::as_tibble() %>% omProcessDaily()
    if("hourly" %in% nms) tblHourly <- jsObj$hourly %>% tibble::as_tibble() %>% omProcessHourly()
    
    # Helper function for unit conversions
    helperMetricUnit <- function(x, mapper, desc=NULL) {
        if(is.null(desc)) 
            desc <- as.list(match.call())$x %>% 
                deparse() %>% 
                stringr::str_replace_all(pattern=".*\\$", replacement="")
        x %>% 
            tibble::as_tibble() %>% 
            pivot_longer(cols=everything()) %>% 
            left_join(mapper, by=c("name"="metric")) %>% 
            mutate(value=stringr::str_replace(value, "\u00b0", "deg ")) %>% 
            mutate(metricType=desc) %>% 
            select(metricType, everything())
    }
    
    # Get the unit descriptions
    if("daily_units" %in% nms) tblUnitsDaily <- helperMetricUnit(jsObj$daily_units, mapDaily)
    if("hourly_units" %in% nms) tblUnitsHourly <- helperMetricUnit(jsObj$hourly_units, mapHourly)
    if(is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) tblUnits <- tblUnitsHourly
    else if(!is.null(tblUnitsDaily) & is.null(tblUnitsHourly)) tblUnits <- tblUnitsDaily
    else if(!is.null(tblUnitsDaily) & !is.null(tblUnitsHourly)) 
        tblUnits <- bind_rows(tblUnitsHourly, tblUnitsDaily)
    else tblUnits <- NULL
    
    # Put everything else together
    tblDescription <- jsObj[setdiff(nms, c("hourly", "hourly_units", "daily", "daily_units"))] %>%
        tibble::as_tibble()
    
    # Return the list objects
    list(tblDaily=tblDaily, tblHourly=tblHourly, tblUnits=tblUnits, tblDescription=tblDescription)
    
}

tmpOM2 <- readOpenMeteoJSON("notuse_testOM.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly, daily_units, daily
tmpOM2
## $tblDaily
## # A tibble: 373 × 5
##    date       time       precipitation_sum rain_sum snowfall_sum
##    <date>     <chr>                  <dbl>    <dbl>        <dbl>
##  1 2022-06-01 2022-06-01               3.8      3.8            0
##  2 2022-06-02 2022-06-02               0        0              0
##  3 2022-06-03 2022-06-03               0        0              0
##  4 2022-06-04 2022-06-04               1.3      1.3            0
##  5 2022-06-05 2022-06-05               0.3      0.3            0
##  6 2022-06-06 2022-06-06              12.5     12.5            0
##  7 2022-06-07 2022-06-07               2        2              0
##  8 2022-06-08 2022-06-08              16       16              0
##  9 2022-06-09 2022-06-09               0        0              0
## 10 2022-06-10 2022-06-10               0.6      0.6            0
## # … with 363 more rows
## 
## $tblHourly
## # A tibble: 8,952 × 10
##    time                date        hour temperat…¹ relat…² dewpo…³ preci…⁴  rain
##    <dttm>              <date>     <int>      <dbl>   <int>   <dbl>   <dbl> <dbl>
##  1 2022-06-01 00:00:00 2022-06-01     0       21        92    19.7     0.1   0.1
##  2 2022-06-01 01:00:00 2022-06-01     1       20.6      94    19.6     0.3   0.3
##  3 2022-06-01 02:00:00 2022-06-01     2       21.1      93    19.9     0     0  
##  4 2022-06-01 03:00:00 2022-06-01     3       20.8      93    19.7     0     0  
##  5 2022-06-01 04:00:00 2022-06-01     4       20.5      93    19.3     0     0  
##  6 2022-06-01 05:00:00 2022-06-01     5       19.7      95    19       0.7   0.7
##  7 2022-06-01 06:00:00 2022-06-01     6       19.4      96    18.8     1.6   1.6
##  8 2022-06-01 07:00:00 2022-06-01     7       19.2      96    18.5     1     1  
##  9 2022-06-01 08:00:00 2022-06-01     8       18.6      90    17       0.1   0.1
## 10 2022-06-01 09:00:00 2022-06-01     9       17.7      84    14.9     0     0  
## # … with 8,942 more rows, 2 more variables: snowfall <dbl>, origTime <chr>, and
## #   abbreviated variable names ¹​temperature_2m, ²​relativehumidity_2m,
## #   ³​dewpoint_2m, ⁴​precipitation
## 
## $tblUnits
## # A tibble: 11 × 4
##    metricType   name                value   description                         
##    <chr>        <chr>               <chr>   <chr>                               
##  1 hourly_units time                iso8601 <NA>                                
##  2 hourly_units temperature_2m      deg C   Air temperature at 2 meters above g…
##  3 hourly_units relativehumidity_2m %       Relative humidity at 2 meters above…
##  4 hourly_units dewpoint_2m         deg C   Dew point temperature at 2 meters a…
##  5 hourly_units precipitation       mm      Total precipitation (rain, showers,…
##  6 hourly_units rain                mm      Only liquid precipitation of the pr…
##  7 hourly_units snowfall            cm      Snowfall amount of the preceding ho…
##  8 daily_units  time                iso8601 <NA>                                
##  9 daily_units  precipitation_sum   mm      Sum of daily precipitation (includi…
## 10 daily_units  rain_sum            mm      Sum of daily rain                   
## 11 daily_units  snowfall_sum        cm      Sum of daily snowfall               
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
##      <dbl>     <dbl>             <dbl>             <int> <chr>   <chr>     <dbl>
## 1     41.8     -87.7              2.66            -18000 Americ… CDT         180
## # … with abbreviated variable names ¹​utc_offset_seconds, ²​timezone,
## #   ³​timezone_abbreviation, ⁴​elevation
prettyOpenMeteoMeta(tmpOM2)
## 
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 2.658963
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
identical(tmpOM$tblUnits, tmpOM2$tblUnits)
## [1] TRUE
identical(tmpOM$tblDescription, tmpOM2$tblDescription)
## [1] TRUE
identical(tmpOM$tblDaily %>% omProcessDaily(), tmpOM2$tblDaily)
## [1] TRUE
identical(tmpOM$tblHourly %>% omProcessHourly(), tmpOM2$tblHourly)
## [1] TRUE

The daily data is tested for file download, cached to avoid multiple hits to the server:

testURLDaily <- helperOpenMeteoURL(cityName="Chicago IL", 
                                   dailyIndices=1:nrow(tblMetricsDaily),
                                   startDate="2010-01-01", 
                                   endDate="2023-06-15", 
                                   tz="America/Chicago"
                                   )
## 
## Daily metrics created from indices: weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
testURLDaily
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.84&longitude=-87.68&start_date=2010-01-01&end_date=2023-06-15&daily=weathercode,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,sunrise,sunset,windspeed_10m_max,windgusts_10m_max,winddirection_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration&timezone=America%2FChicago"
# Download file
if(!file.exists("notuse_testOM_daily.json")) {
    fileDownload(fileName="notuse_testOM_daily.json", url=testURLDaily)
} else {
    cat("\nFile notuse_testOM_daily.json already exists, skipping download\n")
}
##                            size isdir mode               mtime
## notuse_testOM_daily.json 573218 FALSE  666 2023-06-23 07:53:04
##                                        ctime               atime exe
## notuse_testOM_daily.json 2023-06-23 07:52:59 2023-06-23 07:53:04  no

Data are read and stored as a list:

tmpOMDaily <- readOpenMeteoJSON("notuse_testOM_daily.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, daily_units, daily
tmpOMDaily
## $tblDaily
## # A tibble: 4,914 × 18
##    date       time       weath…¹ tempe…² tempe…³ appar…⁴ appar…⁵ preci…⁶ rain_…⁷
##    <date>     <chr>        <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 2010-01-01 2010-01-01       3    -8.6   -13.4   -14.9   -19.6     0         0
##  2 2010-01-02 2010-01-02       2   -10.4   -15.1   -17.5   -21.7     0         0
##  3 2010-01-03 2010-01-03       3    -7.9   -13.8   -13.6   -20.1     0         0
##  4 2010-01-04 2010-01-04       3    -6.9   -12.3   -12.8   -18.9     0         0
##  5 2010-01-05 2010-01-05       3    -4.8    -9.8   -10.1   -15.7     0         0
##  6 2010-01-06 2010-01-06      71    -4.9    -9      -9.2   -14.4     0         0
##  7 2010-01-07 2010-01-07      73    -5.2    -8.5    -9.3   -13       7.5       0
##  8 2010-01-08 2010-01-08      73    -3      -9.4    -9.2   -15.3     2.3       0
##  9 2010-01-09 2010-01-09       3    -5.8   -12.3   -10.8   -18.2     0         0
## 10 2010-01-10 2010-01-10       3    -8.8   -19.4   -16.2   -25.6     0         0
## # … with 4,904 more rows, 9 more variables: snowfall_sum <dbl>,
## #   precipitation_hours <dbl>, sunrise <chr>, sunset <chr>,
## #   windspeed_10m_max <dbl>, windgusts_10m_max <dbl>,
## #   winddirection_10m_dominant <int>, shortwave_radiation_sum <dbl>,
## #   et0_fao_evapotranspiration <dbl>, and abbreviated variable names
## #   ¹​weathercode, ²​temperature_2m_max, ³​temperature_2m_min,
## #   ⁴​apparent_temperature_max, ⁵​apparent_temperature_min, ⁶​precipitation_sum, …
## 
## $tblHourly
## NULL
## 
## $tblUnits
## # A tibble: 17 × 4
##    metricType  name                       value      description                
##    <chr>       <chr>                      <chr>      <chr>                      
##  1 daily_units time                       "iso8601"  <NA>                       
##  2 daily_units weathercode                "wmo code" The most severe weather co…
##  3 daily_units temperature_2m_max         "deg C"    Maximum and minimum daily …
##  4 daily_units temperature_2m_min         "deg C"    Maximum and minimum daily …
##  5 daily_units apparent_temperature_max   "deg C"    Maximum and minimum daily …
##  6 daily_units apparent_temperature_min   "deg C"    Maximum and minimum daily …
##  7 daily_units precipitation_sum          "mm"       Sum of daily precipitation…
##  8 daily_units rain_sum                   "mm"       Sum of daily rain          
##  9 daily_units snowfall_sum               "cm"       Sum of daily snowfall      
## 10 daily_units precipitation_hours        "h"        The number of hours with r…
## 11 daily_units sunrise                    "iso8601"  Sun rise and set times     
## 12 daily_units sunset                     "iso8601"  Sun rise and set times     
## 13 daily_units windspeed_10m_max          "km/h"     Maximum wind speed and gus…
## 14 daily_units windgusts_10m_max          "km/h"     Maximum wind speed and gus…
## 15 daily_units winddirection_10m_dominant "deg "     Dominant wind direction    
## 16 daily_units shortwave_radiation_sum    "MJ/m²"    The sum of solar radiaion …
## 17 daily_units et0_fao_evapotranspiration "mm"       Daily sum of ET0 Reference…
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
##      <dbl>     <dbl>             <dbl>             <int> <chr>   <chr>     <dbl>
## 1     41.8     -87.7              508.            -18000 Americ… CDT         180
## # … with abbreviated variable names ¹​utc_offset_seconds, ²​timezone,
## #   ³​timezone_abbreviation, ⁴​elevation
prettyOpenMeteoMeta(tmpOMDaily)
## 
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 508.3281
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180
# Exploration of precipitation hours by day
tmpOMDaily$tblDaily %>% count(precipitation_hours) %>% print(n=30)
## # A tibble: 25 × 2
##    precipitation_hours     n
##                  <dbl> <int>
##  1                   0  2499
##  2                   1   287
##  3                   2   288
##  4                   3   231
##  5                   4   180
##  6                   5   201
##  7                   6   152
##  8                   7   157
##  9                   8   123
## 10                   9   121
## 11                  10    98
## 12                  11    90
## 13                  12    74
## 14                  13    53
## 15                  14    70
## 16                  15    41
## 17                  16    54
## 18                  17    48
## 19                  18    36
## 20                  19    31
## 21                  20    22
## 22                  21    11
## 23                  22    15
## 24                  23    18
## 25                  24    14
tmpOMDaily$tblDaily %>%
    filter(lubridate::year(date)<=2022) %>%
    ggplot(aes(x=precipitation_hours)) + 
    geom_density(aes(group=lubridate::year(date), color=as.factor(lubridate::year(date)))) + 
    scale_color_discrete("Year") + 
    labs(title="Hours of Precipitation per Day", x="Hours of Precipitation", y="Annual density")

tmpOMDaily$tblDaily %>%
    filter(lubridate::year(date)<=2022) %>%
    ggplot(aes(x=precipitation_hours)) + 
    geom_histogram(aes(fill=as.factor(lubridate::year(date))), bins=25) + 
    scale_fill_discrete("Year") + 
    facet_wrap(~lubridate::year(date)) +
    labs(title="Hours of Precipitation per Day", x="Hours of Precipitation", y="# Days")

Precipitation by month is explored:

dfPrecip <- tmpOMDaily$tblDaily %>%
    filter(lubridate::year(date)<=2022) %>%
    select(date, precipitation_sum, rain_sum, snowfall_sum) %>%
    mutate(month=factor(month.abb[lubridate::month(date)], levels=month.abb), 
           yyyymm=customYYYYMM(date)
           ) %>%
    group_by(yyyymm, month) %>%
    summarize(across(where(is.numeric), sum), n=n(), .groups="drop")
dfPrecip
## # A tibble: 156 × 6
##    yyyymm  month precipitation_sum rain_sum snowfall_sum     n
##    <chr>   <fct>             <dbl>    <dbl>        <dbl> <int>
##  1 2010-01 Jan                25.9     12.6        10.8     31
##  2 2010-02 Feb                36.1      0.1        28.6     28
##  3 2010-03 Mar                58       47.7         7.21    31
##  4 2010-04 Apr               100.     100.          0       30
##  5 2010-05 May               154.     154.          0       31
##  6 2010-06 Jun               226.     226.          0       30
##  7 2010-07 Jul               145.     145.          0       31
##  8 2010-08 Aug                66.4     66.4         0       31
##  9 2010-09 Sep               104.     104.          0       30
## 10 2010-10 Oct                60.7     60.7         0       31
## # … with 146 more rows
# Boxplot of precipitation by month
dfPrecip %>%
    select(-n) %>%
    pivot_longer(-c(yyyymm, month)) %>%
    ggplot(aes(x=month, y=ifelse(name=="snowfall_sum", 10*value, value))) + 
    geom_boxplot(fill="lightblue") + 
    facet_wrap(~name, scales="free_y") + 
    labs(title="Precipitation by month (2010-2022)", y="Precipitation (mm)", x=NULL) + 
    theme(axis.text.x = element_text(angle = 90)) + 
    lims(y=c(0, NA))

# Mean precipitation by month
dfPrecip %>%
    group_by(month) %>%
    summarize(across(where(is.numeric), mean)) %>%
    ggplot(aes(x=month)) + 
    geom_col(aes(y=precipitation_sum), fill="green") + 
    geom_col(aes(y=rain_sum), fill="lightblue") + 
    geom_text(aes(y=rain_sum/2, label=round(rain_sum))) +
    geom_text(aes(y=rain_sum/2 + precipitation_sum/2, 
                  label=ifelse(precipitation_sum>rain_sum+3, round(precipitation_sum-rain_sum), "")
                  )
              ) + 
    geom_text(aes(y=precipitation_sum+5, label=round(precipitation_sum))) +
    labs(x=NULL, 
         y="Precipitation (mm)", 
         title="Mean precipitation by month (2010-2022)", 
         subtitle="Light blue is mm falling as rain, green is liquid equivalent of other"
         )

Average temperatures by month are also explored:

dfTemp <- tmpOMDaily$tblDaily %>%
    filter(lubridate::year(date)<=2022) %>%
    select(date, 
           temperature_2m_max, 
           temperature_2m_min, 
           apparent_temperature_max, 
           apparent_temperature_min
           ) %>%
    mutate(month=factor(month.abb[lubridate::month(date)], levels=month.abb), 
           yyyymm=customYYYYMM(date)
           ) %>%
    group_by(yyyymm, month) %>%
    summarize(across(where(is.numeric), mean), n=n(), .groups="drop")
dfTemp
## # A tibble: 156 × 7
##    yyyymm  month temperature_2m_max temperature_2m_min apparent_…¹ appar…²     n
##    <chr>   <fct>              <dbl>              <dbl>       <dbl>   <dbl> <int>
##  1 2010-01 Jan               -2.46              -8.18        -7.64  -13.7     31
##  2 2010-02 Feb               -0.414             -7.31        -4.85  -12.4     28
##  3 2010-03 Mar                7.67              -0.452        4.25   -5.13    31
##  4 2010-04 Apr               16.9                7.40        14.6     3.56    30
##  5 2010-05 May               20.5               13.1         20.5    11.3     31
##  6 2010-06 Jun               26.0               19.1         28.0    19.5     30
##  7 2010-07 Jul               29.1               21.7         32.0    23.5     31
##  8 2010-08 Aug               28.6               21.5         31.2    23.0     31
##  9 2010-09 Sep               22.7               16.0         22.1    14.8     30
## 10 2010-10 Oct               18.1               10.3         15.4     7.14    31
## # … with 146 more rows, and abbreviated variable names
## #   ¹​apparent_temperature_max, ²​apparent_temperature_min
# Boxplot of precipitation by month
dfTemp %>%
    select(-n) %>%
    pivot_longer(-c(yyyymm, month)) %>%
    ggplot(aes(x=month, y=value)) + 
    geom_boxplot(fill="lightblue") + 
    facet_wrap(~name) + 
    labs(title="Average temperature by month (2010-2022)", y="Average temperature (C)", x=NULL) + 
    theme(axis.text.x = element_text(angle = 90))

# Mean temperatures by month
dfTemp %>%
    select(-n) %>%
    group_by(month) %>%
    summarize(across(where(is.numeric), mean)) %>%
    pivot_longer(cols=-c(month)) %>%
    mutate(measType=stringr::str_replace(name, ".*_", ""), 
           meas=ifelse(str_detect(name, "apparent"), "apparent", "actual")
           ) %>%
    select(-name) %>%
    pivot_wider(id_cols=c(month, meas), names_from="measType", values_from="value") %>%
    ggplot(aes(x=month)) + 
    geom_tile(aes(y=(max+min)/2, height=max-min), width=0.5, fill="lightblue") +
    geom_text(aes(y=max+1, label=round(max, 1)), size=2.5) +
    geom_text(aes(y=min-1, label=round(min, 1)), size=2.5) +
    labs(x=NULL, 
         y="Temperature (C)", 
         title="Mean high and low temperature by month (2010-2022)", 
         subtitle="Actual temperature and apparent temperature"
         ) + 
    facet_wrap(~meas)

Sunrise and sunset times are explored:

dfSun <- tmpOMDaily$tblDaily %>%
    filter(lubridate::year(date)<=2022) %>%
    select(date, sunrise, sunset) %>%
    mutate(month=factor(month.abb[lubridate::month(date)], levels=month.abb), 
           yyyymm=customYYYYMM(date),
           across(c(sunrise, sunset), lubridate::ymd_hm), 
           sr=hms::as_hms(sunrise), 
           ss=hms::as_hms(sunset), 
           doy=lubridate::yday(date), 
           year=lubridate::year(date)
           ) 
dfSun
## # A tibble: 4,748 × 9
##    date       sunrise             sunset              month yyyymm  sr     ss   
##    <date>     <dttm>              <dttm>              <fct> <chr>   <time> <tim>
##  1 2010-01-01 2010-01-01 08:16:00 2010-01-01 17:32:00 Jan   2010-01 08:16  17:32
##  2 2010-01-02 2010-01-02 08:16:00 2010-01-02 17:33:00 Jan   2010-01 08:16  17:33
##  3 2010-01-03 2010-01-03 08:16:00 2010-01-03 17:34:00 Jan   2010-01 08:16  17:34
##  4 2010-01-04 2010-01-04 08:16:00 2010-01-04 17:34:00 Jan   2010-01 08:16  17:34
##  5 2010-01-05 2010-01-05 08:16:00 2010-01-05 17:35:00 Jan   2010-01 08:16  17:35
##  6 2010-01-06 2010-01-06 08:16:00 2010-01-06 17:36:00 Jan   2010-01 08:16  17:36
##  7 2010-01-07 2010-01-07 08:16:00 2010-01-07 17:37:00 Jan   2010-01 08:16  17:37
##  8 2010-01-08 2010-01-08 08:16:00 2010-01-08 17:38:00 Jan   2010-01 08:16  17:38
##  9 2010-01-09 2010-01-09 08:16:00 2010-01-09 17:39:00 Jan   2010-01 08:16  17:39
## 10 2010-01-10 2010-01-10 08:15:00 2010-01-10 17:40:00 Jan   2010-01 08:15  17:40
## # … with 4,738 more rows, and 2 more variables: doy <dbl>, year <dbl>
# Plot of sunrise and sunset by day of year
dfSun %>%
    select(date, month, year, doy, sr, ss) %>%
    ggplot(aes(x=doy, group=factor(year), color=factor(year))) + 
    geom_line(aes(y=sr)) + 
    geom_line(aes(y=ss)) + 
    geom_line(aes(y=(ss+sr)/2)) +
    labs(x="Day of year", y="Time (always on DST)", title="Sunrise, sunset, and solar noon by day of year") + 
    scale_color_discrete("Year")

# Plot of minutes gained from earliest/latest
dfSun %>%
    select(date, month, year, doy, sr, ss) %>%
    group_by(year) %>%
    mutate(dsr=max(sr)-sr, dss=ss-min(ss)) %>%
    ungroup() %>%
    rename(sunrise_change=dsr, sunset_change=dss) %>%
    pivot_longer(cols=c(sunrise_change, sunset_change)) %>%
    ggplot(aes(x=doy)) + 
    geom_point(aes(y=as.numeric(value)/60, color=name), size=0.5) +
    labs(x="Day of year", y="Minutes", title="Delta from latest sunrise / earliest sunset") + 
    scale_color_discrete("Metric")

Wind data is explored:

dfWind <- tmpOMDaily$tblDaily %>% 
    select(date, 
           dir=winddirection_10m_dominant, 
           spd=windspeed_10m_max, 
           gst=windgusts_10m_max
           ) %>% 
    mutate(month=lubridate::month(date), 
           year=lubridate::year(date), 
           dir10=round(dir/10)*10, 
           spd5=round(spd/5)*5, 
           gst5=round(gst/5)*5
           ) 
dfWind
## # A tibble: 4,914 × 9
##    date         dir   spd   gst month  year dir10  spd5  gst5
##    <date>     <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 2010-01-01   291  23.4  38.2     1  2010   290    25    40
##  2 2010-01-02   309  24.3  40.3     1  2010   310    25    40
##  3 2010-01-03   313  21.6  35.6     1  2010   310    20    35
##  4 2010-01-04   304  21    35.3     1  2010   300    20    35
##  5 2010-01-05   298  19.8  33.1     1  2010   300    20    35
##  6 2010-01-06   280  16.4  27       1  2010   280    15    25
##  7 2010-01-07   240  16.3  29.5     1  2010   240    15    30
##  8 2010-01-08   334  27.6  45       1  2010   330    30    45
##  9 2010-01-09   305  17.2  28.1     1  2010   300    15    30
## 10 2010-01-10   226  27.9  46.4     1  2010   230    30    45
## # … with 4,904 more rows
# Plot of wind direction and speed
dfWind %>%
    ggplot(aes(x=dir, y=spd)) + 
    geom_point(alpha=0.2, size=0.5) + 
    coord_polar() + 
    facet_wrap(~factor(month.abb[month], levels=month.abb), nrow=2) + 
    geom_vline(xintercept=c(0, 90, 180, 270), lty=2, color="red") + 
    labs(title="Maximum wind speed and predominant direction (measured daily)", 
         y="Maximum Wind speed (km/h)", 
         x="Predominant Wind direction"
         ) + 
    scale_x_continuous(breaks=c(0, 90, 180, 270))

dfWind %>%
    filter(lubridate::year(date)<=2022) %>%
    count(month, dir10, spd5) %>%
    ggplot(aes(x=dir10, y=spd5)) + 
    geom_point(aes(size=n), alpha=0.2) + 
    coord_polar() + 
    facet_wrap(~factor(month.abb[month], levels=month.abb)) + 
    geom_vline(xintercept=c(0, 90, 180, 270), lty=2, color="red") + 
    labs(title="Maximum wind speed and predominant direction (measured daily)", 
         subtitle="Wind speed rounded to nearest 5 km/h, wind direction rounded to nearest 10 degrees",
         y="Maximum Wind speed (km/h)", 
         x="Predominant Wind direction"
         ) + 
    scale_x_continuous(breaks=c(0, 90, 180, 270))

# Plot of predominant wind direction
dfWind %>% 
    ggplot(aes(x=dir10)) + 
    geom_histogram(binwidth=10) + 
    facet_wrap(~factor(month.abb[month], levels=month.abb)) + 
    geom_vline(xintercept=c(0, 90, 180, 270, 360), lty=2, color="red") + 
    labs(title="Predominant wind direction (measured daily)", 
         y="# Days", 
         x="Predominant wind direction (rounded to nearest 10 degrees)"
         ) + 
    scale_x_continuous(breaks=c(0, 90, 180, 270, 360))

# Plot of maximum wind speed
dfWind %>% 
    ggplot(aes(x=spd5)) + 
    geom_histogram(binwidth=5) + 
    facet_wrap(~factor(month.abb[month], levels=month.abb)) + 
    labs(title="Maximum wind speed (measured daily)", 
         y="# Days", 
         x="Maximum wind speed (km/h, rounded to nearest 5 km/h)"
         )

# Mean maximum wind speed by month
dfWind %>%
    filter(year<=2022) %>%
    select(date, month, year, spd, gst) %>%
    pivot_longer(cols=-c(date, month, year)) %>%
    ggplot(aes(x=factor(month.abb[month], levels=month.abb), y=value)) + 
    geom_boxplot(fill="lightblue") + 
    facet_wrap(~c("gst"="2. Maximum wind gust", "spd"="1. Maximum wind speed")[name]) + 
    labs(title="Wind speed measured daily (2010-2022)", y="Wind speed (km/h)", x=NULL) + 
    theme(axis.text.x = element_text(angle = 90)) + 
    lims(y=c(0, NA))

Weather codes, radiation, and evapotranspiration are explored:

dfOther <- tmpOMDaily$tblDaily %>% 
    select(date, wc=weathercode, sw=shortwave_radiation_sum, et=et0_fao_evapotranspiration) %>%
    mutate(wc=factor(wc, levels=sort(unique(wc))), 
           year=lubridate::year(date), 
           month=factor(month.abb[lubridate::month(date)], levels=month.abb), 
           yyyymm=customYYYYMM(date)
           )
dfOther
## # A tibble: 4,914 × 7
##    date       wc       sw    et  year month yyyymm 
##    <date>     <fct> <dbl> <dbl> <dbl> <fct> <chr>  
##  1 2010-01-01 3      6.94  0.53  2010 Jan   2010-01
##  2 2010-01-02 2      7.91  0.49  2010 Jan   2010-01
##  3 2010-01-03 3      5.62  0.46  2010 Jan   2010-01
##  4 2010-01-04 3      5.09  0.48  2010 Jan   2010-01
##  5 2010-01-05 3      6.61  0.52  2010 Jan   2010-01
##  6 2010-01-06 71     7.47  0.48  2010 Jan   2010-01
##  7 2010-01-07 73     3.82  0.29  2010 Jan   2010-01
##  8 2010-01-08 73     6.47  0.53  2010 Jan   2010-01
##  9 2010-01-09 3      6.22  0.38  2010 Jan   2010-01
## 10 2010-01-10 3      8.99  0.35  2010 Jan   2010-01
## # … with 4,904 more rows
# Histogram of weather code
dfOther %>%
    filter(year<=2022) %>%
    ggplot(aes(x=wc)) + 
    geom_bar() + 
    facet_wrap(~month) + 
    labs(title="Weather codes by month (2010-2022)", y="Count", x="Weather code") + 
    theme(axis.text.x = element_text(angle = 90))

# Mean radiation and evapotranspiration by month
dfOther %>%
    select(-year) %>%
    group_by(month) %>%
    summarize(across(where(is.numeric), mean)) %>%
    pivot_longer(cols=-c(month)) %>%
    ggplot(aes(x=month)) + 
    geom_point(aes(y=value)) +
    geom_line(aes(y=value, group=1)) +
    labs(x=NULL, 
         y=NULL, 
         title="Mean radiation and evapotranspiration by month (2010-2022)",
         subtitle="Evapotranspiration (mm) and Radiation (MegaJoules)"
         ) + 
    facet_wrap(~c("et"="Evapotranspiration (mm)", "sw"="Radiation (MJ)")[name], scales="free_y") + 
    lims(y=c(0, NA))

# Boxplot for radiation and evapotranspiration by month
dfOther %>%
    select(date, month, sw, et) %>%
    pivot_longer(-c(date, month)) %>%
    ggplot(aes(x=month)) + 
    geom_boxplot(aes(y=value), fill="lightblue") +
    labs(x=NULL, 
         y=NULL, 
         title="Daily radiation and evapotranspiration (2010-2022)",
         subtitle="Evapotranspiration (mm) and Radiation (MegaJoules)"
         ) + 
    facet_wrap(~c("et"="Evapotranspiration (mm)", "sw"="Radiation (MJ)")[name], scales="free_y") + 
    lims(y=c(0, NA))

The hourly data is tested for file download, cached to avoid multiple hits to the server:

testURLHourly <- helperOpenMeteoURL(cityName="Chicago IL", 
                                    hourlyIndices=1:nrow(tblMetricsHourly),
                                    startDate="2010-01-01", 
                                    endDate="2023-06-15", 
                                    tz="America/Chicago"
                                    )
## 
## Hourly metrics created from indices: temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
testURLHourly
## [1] "https://archive-api.open-meteo.com/v1/archive?latitude=41.84&longitude=-87.68&start_date=2010-01-01&end_date=2023-06-15&hourly=temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,cloudcover_low,cloudcover_mid,cloudcover_high,shortwave_radiation,direct_radiation,direct_normal_irradiance,diffuse_radiation,windspeed_10m,windspeed_100m,winddirection_10m,winddirection_100m,windgusts_10m,et0_fao_evapotranspiration,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm&timezone=America%2FChicago"
# Download file
if(!file.exists("notuse_testOM_hourly.json")) {
    fileDownload(fileName="notuse_testOM_hourly.json", url=testURLHourly)
} else {
    cat("\nFile notuse_testOM_hourly.json already exists, skipping download\n")
}
##                               size isdir mode               mtime
## notuse_testOM_hourly.json 20178300 FALSE  666 2023-06-30 08:03:13
##                                         ctime               atime exe
## notuse_testOM_hourly.json 2023-06-30 08:02:50 2023-06-30 08:03:13  no

Data are read and stored as a list:

tmpOMHourly <- readOpenMeteoJSON("notuse_testOM_hourly.json")
## 
## Objects in JSON include: latitude, longitude, generationtime_ms, utc_offset_seconds, timezone, timezone_abbreviation, elevation, hourly_units, hourly
tmpOMHourly
## $tblDaily
## NULL
## 
## $tblHourly
## # A tibble: 117,936 × 37
##    time                date        hour temper…¹ relat…² dewpo…³ appar…⁴ press…⁵
##    <dttm>              <date>     <int>    <dbl>   <int>   <dbl>   <dbl>   <dbl>
##  1 2010-01-01 00:00:00 2010-01-01     0     -9.5      67   -14.4   -15.8   1024.
##  2 2010-01-01 01:00:00 2010-01-01     1     -9.8      69   -14.4   -16.3   1025.
##  3 2010-01-01 02:00:00 2010-01-01     2    -10.3      73   -14.2   -16.8   1025.
##  4 2010-01-01 03:00:00 2010-01-01     3    -10.8      74   -14.5   -17.2   1026.
##  5 2010-01-01 04:00:00 2010-01-01     4    -11.3      75   -14.8   -17.7   1026.
##  6 2010-01-01 05:00:00 2010-01-01     5    -11.8      76   -15.1   -18.2   1026.
##  7 2010-01-01 06:00:00 2010-01-01     6    -12.3      77   -15.5   -18.6   1027.
##  8 2010-01-01 07:00:00 2010-01-01     7    -12.8      78   -15.8   -19     1028.
##  9 2010-01-01 08:00:00 2010-01-01     8    -13.2      79   -16.1   -19.4   1028.
## 10 2010-01-01 09:00:00 2010-01-01     9    -13.4      78   -16.3   -19.6   1028.
## # … with 117,926 more rows, 29 more variables: surface_pressure <dbl>,
## #   precipitation <dbl>, rain <dbl>, snowfall <dbl>, cloudcover <int>,
## #   cloudcover_low <int>, cloudcover_mid <int>, cloudcover_high <int>,
## #   shortwave_radiation <dbl>, direct_radiation <dbl>,
## #   direct_normal_irradiance <dbl>, diffuse_radiation <dbl>,
## #   windspeed_10m <dbl>, windspeed_100m <dbl>, winddirection_10m <int>,
## #   winddirection_100m <int>, windgusts_10m <dbl>, …
## 
## $tblUnits
## # A tibble: 34 × 4
##    metricType   name                 value   description                        
##    <chr>        <chr>                <chr>   <chr>                              
##  1 hourly_units time                 iso8601 <NA>                               
##  2 hourly_units temperature_2m       deg C   Air temperature at 2 meters above …
##  3 hourly_units relativehumidity_2m  %       Relative humidity at 2 meters abov…
##  4 hourly_units dewpoint_2m          deg C   Dew point temperature at 2 meters …
##  5 hourly_units apparent_temperature deg C   Apparent temperature is the percei…
##  6 hourly_units pressure_msl         hPa     Atmospheric air pressure reduced t…
##  7 hourly_units surface_pressure     hPa     Atmospheric air pressure reduced t…
##  8 hourly_units precipitation        mm      Total precipitation (rain, showers…
##  9 hourly_units rain                 mm      Only liquid precipitation of the p…
## 10 hourly_units snowfall             cm      Snowfall amount of the preceding h…
## # … with 24 more rows
## 
## $tblDescription
## # A tibble: 1 × 7
##   latitude longitude generationtime_ms utc_offset_seco…¹ timez…² timez…³ eleva…⁴
##      <dbl>     <dbl>             <dbl>             <int> <chr>   <chr>     <dbl>
## 1     41.8     -87.7             6370.            -18000 Americ… CDT         180
## # … with abbreviated variable names ¹​utc_offset_seconds, ²​timezone,
## #   ³​timezone_abbreviation, ⁴​elevation
prettyOpenMeteoMeta(tmpOMHourly)
## 
## latitude: 41.8
## longitude: -87.7
## generationtime_ms: 6369.988
## utc_offset_seconds: -18000
## timezone: America/Chicago
## timezone_abbreviation: CDT
## elevation: 180

Consistency of data between daily and hourly is explored:

# Variables where maximum of hourly should be created
vrblMax <- c("weathercode", "temperature_2m", "apparent_temperature", "windspeed_10m", "windgusts_10m")

# Variables where minimum of hourly should be created
vrblMin <- c("temperature_2m", "apparent_temperature")

# Variables where sum of hourly should be created
vrblSum <- c("precipitation", "rain", "snowfall", "shortwave_radiation", "et0_fao_evapotranspiration")

# Variables in daily not to explore
# date, time, sunrise, sunset

# Variables that require a different approach
# winddirection_10m_dominant, precipitation_hours

# Check that all variables are included in hourly data
c(vrblMax, vrblMin, vrblSum) %in% (tmpOMHourly$tblHourly %>% names)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Create daily data from hourly
dfDailyFromHourly <- tmpOMHourly$tblHourly %>%
    group_by(date) %>%
    summarize(across(.cols=all_of(vrblMax), .fns=max, .names="{.col}_max"),
              across(.cols=all_of(vrblMin), .fns=min, .names="{.col}_min"), 
              across(.cols=all_of(vrblSum), .fns=sum, .names="{.col}_sum"), 
              precipitation_hours=sum(precipitation>0)
              ) %>%
    rename(weathercode=weathercode_max, et0_fao_evapotranspiration=et0_fao_evapotranspiration_sum)
dfDailyFromHourly
## # A tibble: 4,914 × 14
##    date       weatherc…¹ tempe…² appar…³ winds…⁴ windg…⁵ tempe…⁶ appar…⁷ preci…⁸
##    <date>          <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 2010-01-01          3    -8.6   -14.9    23.4    38.2   -13.4   -19.6     0  
##  2 2010-01-02          2   -10.4   -17.5    24.3    40.3   -15.1   -21.7     0  
##  3 2010-01-03          3    -7.9   -13.6    21.6    35.6   -13.8   -20.1     0  
##  4 2010-01-04          3    -6.9   -12.8    21      35.3   -12.3   -18.9     0  
##  5 2010-01-05          3    -4.8   -10.1    19.8    33.1    -9.8   -15.7     0  
##  6 2010-01-06         71    -4.9    -9.2    16.4    27      -9     -14.4     0  
##  7 2010-01-07         73    -5.2    -9.3    16.3    29.5    -8.5   -13       7.5
##  8 2010-01-08         73    -3      -9.2    27.6    45      -9.4   -15.3     2.3
##  9 2010-01-09          3    -5.8   -10.8    17.2    28.1   -12.3   -18.2     0  
## 10 2010-01-10          3    -8.8   -16.2    27.9    46.4   -19.4   -25.6     0  
## # … with 4,904 more rows, 5 more variables: rain_sum <dbl>, snowfall_sum <dbl>,
## #   shortwave_radiation_sum <dbl>, et0_fao_evapotranspiration <dbl>,
## #   precipitation_hours <int>, and abbreviated variable names ¹​weathercode,
## #   ²​temperature_2m_max, ³​apparent_temperature_max, ⁴​windspeed_10m_max,
## #   ⁵​windgusts_10m_max, ⁶​temperature_2m_min, ⁷​apparent_temperature_min,
## #   ⁸​precipitation_sum
names(dfDailyFromHourly)
##  [1] "date"                       "weathercode"               
##  [3] "temperature_2m_max"         "apparent_temperature_max"  
##  [5] "windspeed_10m_max"          "windgusts_10m_max"         
##  [7] "temperature_2m_min"         "apparent_temperature_min"  
##  [9] "precipitation_sum"          "rain_sum"                  
## [11] "snowfall_sum"               "shortwave_radiation_sum"   
## [13] "et0_fao_evapotranspiration" "precipitation_hours"
names(dfDailyFromHourly) %in% names(tmpOMDaily$tblDaily)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Check data consistency
for (colName in names(dfDailyFromHourly)) {
    cat("\n", 
        colName, 
        ":", 
        all.equal(dfDailyFromHourly %>% pull(colName), tmpOMDaily$tblDaily %>% pull(colName))
        )
}
## 
##  date : TRUE
##  weathercode : TRUE
##  temperature_2m_max : TRUE
##  apparent_temperature_max : TRUE
##  windspeed_10m_max : TRUE
##  windgusts_10m_max : TRUE
##  temperature_2m_min : TRUE
##  apparent_temperature_min : TRUE
##  precipitation_sum : TRUE
##  rain_sum : TRUE
##  snowfall_sum : TRUE
##  shortwave_radiation_sum : Mean relative difference: 0.9964
##  et0_fao_evapotranspiration : TRUE
##  precipitation_hours : TRUE
# Plot for differences in radiation
dfRadiation <- dfDailyFromHourly %>%
    select(date, shortwave_radiation_sum) %>%
    bind_rows(select(tmpOMDaily$tblDaily, date, shortwave_radiation_sum), .id="src") %>%
    mutate(src=c("1"="Daily from Hourly", "2"="Daily as Reported")[src])
dfRadiation %>%
    ggplot(aes(x=date, y=shortwave_radiation_sum)) + 
    geom_line(aes(group=src, color=src)) + 
    labs(x=NULL, y="Sum of radiation", title="Comparison of shortwave radiation by day by source")

# Exploration of units
tmpOMDaily$tblUnits %>% filter(name=="shortwave_radiation_sum")
## # A tibble: 1 × 4
##   metricType  name                    value description                         
##   <chr>       <chr>                   <chr> <chr>                               
## 1 daily_units shortwave_radiation_sum MJ/m² The sum of solar radiaion on a give…
tmpOMHourly$tblUnits %>% filter(name=="shortwave_radiation")
## # A tibble: 1 × 4
##   metricType   name                value description                            
##   <chr>        <chr>               <chr> <chr>                                  
## 1 hourly_units shortwave_radiation W/m²  Shortwave solar radiation as average o…
# Conversion of Watts per hour to MegaJoules
# 0.0036 megajoules/watt-hour
dfRadiation %>%
    ggplot(aes(x=date, y=ifelse(src=="Daily from Hourly", 0.0036, 1)*shortwave_radiation_sum)) + 
    geom_line(aes(group=src, color=src)) + 
    labs(x=NULL, 
         y="Sum of radiation", 
         title="Comparison of shortwave radiation by day by source", 
         subtitle="Summed from hourly multiplied by 0.0036 to convert Watt-hours to MegaJoules"
         )

dfRadiation %>%
    pivot_wider(id_cols="date", names_from="src", values_from="shortwave_radiation_sum") %>%
    mutate(rat=`Daily as Reported`/`Daily from Hourly`) %>%
    summary()
##       date            Daily from Hourly Daily as Reported      rat          
##  Min.   :2010-01-01   Min.   : 135      Min.   : 0.49     Min.   :0.003585  
##  1st Qu.:2013-05-13   1st Qu.:2304      1st Qu.: 8.29     1st Qu.:0.003599  
##  Median :2016-09-22   Median :4062      Median :14.62     Median :0.003600  
##  Mean   :2016-09-22   Mean   :4190      Mean   :15.08     Mean   :0.003600  
##  3rd Qu.:2020-02-02   3rd Qu.:6056      3rd Qu.:21.80     3rd Qu.:0.003601  
##  Max.   :2023-06-15   Max.   :8788      Max.   :31.64     Max.   :0.003630

With the exception of radiation (reported in different units causing slight rounding differences), the reported daily data matches the expected aggregate of the reported hourly data

Precipitation by hour is explored:

# Hourly precipitation data
dfHourlyPrecip <- tmpOMHourly$tblHourly %>%
    select(time, hour, precipitation, snowfall, rain) %>%
    mutate(year=lubridate::year(time), 
           month=factor(month.abb[lubridate::month(time)], levels=month.abb)
           ) %>%
    pivot_longer(cols=-c(time, year, month, hour))
dfHourlyPrecip
## # A tibble: 353,808 × 6
##    time                 hour  year month name          value
##    <dttm>              <int> <dbl> <fct> <chr>         <dbl>
##  1 2010-01-01 00:00:00     0  2010 Jan   precipitation     0
##  2 2010-01-01 00:00:00     0  2010 Jan   snowfall          0
##  3 2010-01-01 00:00:00     0  2010 Jan   rain              0
##  4 2010-01-01 01:00:00     1  2010 Jan   precipitation     0
##  5 2010-01-01 01:00:00     1  2010 Jan   snowfall          0
##  6 2010-01-01 01:00:00     1  2010 Jan   rain              0
##  7 2010-01-01 02:00:00     2  2010 Jan   precipitation     0
##  8 2010-01-01 02:00:00     2  2010 Jan   snowfall          0
##  9 2010-01-01 02:00:00     2  2010 Jan   rain              0
## 10 2010-01-01 03:00:00     3  2010 Jan   precipitation     0
## # … with 353,798 more rows
# Nil precipitation percent
dfNilPrecip <- dfHourlyPrecip %>%
    group_by(month, name) %>%
    summarize(pctNil=mean(value==0), .groups="drop")
dfNilPrecip
## # A tibble: 36 × 3
##    month name          pctNil
##    <fct> <chr>          <dbl>
##  1 Jan   precipitation  0.845
##  2 Jan   rain           0.925
##  3 Jan   snowfall       0.877
##  4 Feb   precipitation  0.845
##  5 Feb   rain           0.938
##  6 Feb   snowfall       0.865
##  7 Mar   precipitation  0.851
##  8 Mar   rain           0.884
##  9 Mar   snowfall       0.946
## 10 Apr   precipitation  0.807
## # … with 26 more rows
# Graphs of precipitation amount by month
for(metric in unique(dfHourlyPrecip$name)) {
    p1 <- dfHourlyPrecip %>%
        filter(name==metric, value>0, year<=2022) %>%
        ggplot() + 
        geom_histogram(aes(x=value), bins = 50) + 
        facet_wrap(~month) + 
        labs(x=NULL, 
             y=NULL, 
             title=paste0(metric, 
                          ": hourly total (", 
                          tmpOMHourly$tblUnits %>% filter(name==metric) %>% pull(value), 
                          ") from 2010-2022"
                          )
             ) + 
        geom_text(data=dfNilPrecip %>% filter(name==metric), 
                  aes(x=Inf, 
                      y=Inf, 
                      label=paste0("Excludes ", round(100*pctNil, 1), "%\nof observations at 0")
                      ), 
                  size=2.5, 
                  hjust=1, 
                  vjust=1
                  )
    print(p1)
}

Temperature by hour is explored:

# Hourly temperature data
dfHourlyTemp <- tmpOMHourly$tblHourly %>%
    select(time, hour, temperature_2m, apparent_temperature, dewpoint_2m) %>%
    mutate(year=lubridate::year(time), 
           month=factor(month.abb[lubridate::month(time)], levels=month.abb)
           ) %>%
    pivot_longer(cols=-c(time, year, month, hour))
dfHourlyTemp
## # A tibble: 353,808 × 6
##    time                 hour  year month name                 value
##    <dttm>              <int> <dbl> <fct> <chr>                <dbl>
##  1 2010-01-01 00:00:00     0  2010 Jan   temperature_2m        -9.5
##  2 2010-01-01 00:00:00     0  2010 Jan   apparent_temperature -15.8
##  3 2010-01-01 00:00:00     0  2010 Jan   dewpoint_2m          -14.4
##  4 2010-01-01 01:00:00     1  2010 Jan   temperature_2m        -9.8
##  5 2010-01-01 01:00:00     1  2010 Jan   apparent_temperature -16.3
##  6 2010-01-01 01:00:00     1  2010 Jan   dewpoint_2m          -14.4
##  7 2010-01-01 02:00:00     2  2010 Jan   temperature_2m       -10.3
##  8 2010-01-01 02:00:00     2  2010 Jan   apparent_temperature -16.8
##  9 2010-01-01 02:00:00     2  2010 Jan   dewpoint_2m          -14.2
## 10 2010-01-01 03:00:00     3  2010 Jan   temperature_2m       -10.8
## # … with 353,798 more rows
# Graphs of precipitation amount by month
for(metric in unique(dfHourlyTemp$name)) {
    p1 <- dfHourlyTemp %>%
        filter(name==metric, year<=2022) %>%
        ggplot() + 
        geom_boxplot(aes(x=factor(hour), y=value), fill = "lightblue") + 
        facet_wrap(~month) + 
        labs(x=NULL, 
             y=NULL, 
             title=paste0(metric, 
                          ": hourly boxplot (", 
                          tmpOMHourly$tblUnits %>% filter(name==metric) %>% pull(value), 
                          ") from 2010-2022"
                          )
             )
    print(p1)
    
}

# Spread of temperature by day
dfHourlyTemp %>%
    mutate(date=lubridate::date(time)) %>%
    group_by(year, month, date, name) %>%
    summarize(maxValue=max(value), minValue=min(value), mdnValue=median(value), .groups="drop") %>%
    mutate(spd=maxValue-minValue) %>%
    group_by(month, name) %>%
    summarize(across(where(is.numeric), mean), .groups="drop") %>%
    ggplot(aes(x=fct_rev(month), y=spd)) + 
    geom_point() + 
    coord_flip() +
    facet_wrap(~name) + 
    labs(title="Average high/low spread of key metrics by month (deg C)", x=NULL, y="deg C") + 
    lims(y=c(0, NA))

Hours with maximum/minimum temperature and precipitation are explored:

# Create temperature and precipitation data
dfHourlyTempPrecip <- dfHourlyTemp %>%
    bind_rows(dfHourlyPrecip) %>%
    mutate(date=lubridate::date(time)) %>% 
    arrange(time, name) 
dfHourlyTempPrecip
## # A tibble: 707,616 × 7
##    time                 hour  year month name                 value date      
##    <dttm>              <int> <dbl> <fct> <chr>                <dbl> <date>    
##  1 2010-01-01 00:00:00     0  2010 Jan   apparent_temperature -15.8 2010-01-01
##  2 2010-01-01 00:00:00     0  2010 Jan   dewpoint_2m          -14.4 2010-01-01
##  3 2010-01-01 00:00:00     0  2010 Jan   precipitation          0   2010-01-01
##  4 2010-01-01 00:00:00     0  2010 Jan   rain                   0   2010-01-01
##  5 2010-01-01 00:00:00     0  2010 Jan   snowfall               0   2010-01-01
##  6 2010-01-01 00:00:00     0  2010 Jan   temperature_2m        -9.5 2010-01-01
##  7 2010-01-01 01:00:00     1  2010 Jan   apparent_temperature -16.3 2010-01-01
##  8 2010-01-01 01:00:00     1  2010 Jan   dewpoint_2m          -14.4 2010-01-01
##  9 2010-01-01 01:00:00     1  2010 Jan   precipitation          0   2010-01-01
## 10 2010-01-01 01:00:00     1  2010 Jan   rain                   0   2010-01-01
## # … with 707,606 more rows
# Limit to temperature, dewpoint, and precipitation
# Limit precipitation to only days with precipitation > 0
tmpDF <- dfHourlyTempPrecip %>%
    filter(name %in% c("dewpoint_2m", "precipitation", "temperature_2m")) %>%
    group_by(date, name) %>% 
    filter(name!="precipitation" | sum(value)>0) %>%
    mutate(isMax=ifelse(value==max(value), 1, 0), isMin=ifelse(value==min(value), 1, 0)) %>% 
    group_by(name, month, hour) %>% 
    summarize(across(c(isMax, isMin), mean), .groups="drop") %>% 
    pivot_longer(-c(name, month, hour), names_to="metric")
tmpDF
## # A tibble: 1,728 × 5
##    name        month  hour metric  value
##    <chr>       <fct> <int> <chr>   <dbl>
##  1 dewpoint_2m Jan       0 isMax  0.212 
##  2 dewpoint_2m Jan       0 isMin  0.196 
##  3 dewpoint_2m Jan       1 isMax  0.0507
##  4 dewpoint_2m Jan       1 isMin  0.0968
##  5 dewpoint_2m Jan       2 isMax  0.0599
##  6 dewpoint_2m Jan       2 isMin  0.0484
##  7 dewpoint_2m Jan       3 isMax  0.0346
##  8 dewpoint_2m Jan       3 isMin  0.0253
##  9 dewpoint_2m Jan       4 isMax  0.0184
## 10 dewpoint_2m Jan       4 isMin  0.0507
## # … with 1,718 more rows
# Plot max/min for metric
for(keyMetric in unique(tmpDF$name)) {

    p1 <- tmpDF %>% 
        filter(name==keyMetric) %>% 
        ggplot(aes(x=hour, y=value)) + 
        geom_line(aes(color=metric, group=metric)) + 
        facet_wrap(~month) + 
        labs(x="Hour of day", 
             y="% of time as max/min", 
             title=paste0(keyMetric, ": maximum and minimum by hour"), 
             subtitle=paste0("Ties included as full value", 
                             ifelse(keyMetric=="precipitation", " (days with no precipitation excluded)", "")
                             )
             ) + 
        scale_color_discrete("Metric:")
    print(p1)
}

# Plot percent of hours with precipitation
dfHourlyTempPrecip %>% 
    filter(name=="precipitation") %>% 
    group_by(month, hour) %>%
    summarize(pct0=mean(value>0), pct05=mean(value>=0.5), .groups="drop") %>%
    pivot_longer(-c(month, hour)) %>%
    mutate(name=ifelse(name=="pct0", ">=0.1 mm", ">=0.5 mm")) %>%
    ggplot(aes(x=hour, y=value)) + 
    geom_line(aes(group=name, color=name)) + 
    facet_wrap(~month) + 
    labs(x="Hour of day", 
         y="% at/above precipitation hurdle", 
         title=paste0("% of observations with precipitation in past hour")
         ) + 
    lims(y=c(0, NA))

Wind by hour is explored:

# Hourly wind data
dfHourlyWind <- tmpOMHourly$tblHourly %>%
    select(time, hour, windspeed_10m, windgusts_10m, winddirection_10m) %>%
    mutate(year=lubridate::year(time), 
           month=factor(month.abb[lubridate::month(time)], levels=month.abb)
           ) %>%
    pivot_longer(cols=-c(time, year, month, hour))
dfHourlyWind
## # A tibble: 353,808 × 6
##    time                 hour  year month name              value
##    <dttm>              <int> <dbl> <fct> <chr>             <dbl>
##  1 2010-01-01 00:00:00     0  2010 Jan   windspeed_10m      18.7
##  2 2010-01-01 00:00:00     0  2010 Jan   windgusts_10m      33.8
##  3 2010-01-01 00:00:00     0  2010 Jan   winddirection_10m 298  
##  4 2010-01-01 01:00:00     1  2010 Jan   windspeed_10m      20.1
##  5 2010-01-01 01:00:00     1  2010 Jan   windgusts_10m      32.4
##  6 2010-01-01 01:00:00     1  2010 Jan   winddirection_10m 291  
##  7 2010-01-01 02:00:00     2  2010 Jan   windspeed_10m      19.9
##  8 2010-01-01 02:00:00     2  2010 Jan   windgusts_10m      34.2
##  9 2010-01-01 02:00:00     2  2010 Jan   winddirection_10m 290  
## 10 2010-01-01 03:00:00     3  2010 Jan   windspeed_10m      19.5
## # … with 353,798 more rows
# Graphs of wind speed/gust by month
for(metric in setdiff(unique(dfHourlyWind$name), "winddirection_10m")) {
    p1 <- dfHourlyWind %>%
        filter(name==metric, year<=2022) %>%
        ggplot() + 
        geom_boxplot(aes(x=factor(hour), y=value), fill = "lightblue") + 
        facet_wrap(~month) + 
        labs(x=NULL, 
             y=NULL, 
             title=paste0(metric, 
                          ": hourly boxplot (", 
                          tmpOMHourly$tblUnits %>% filter(name==metric) %>% pull(value), 
                          ") from 2010-2022"
                          )
             )
    print(p1)
    
}

# Average wind speed and gust by hour
dfHourlyWind %>%
    filter(!(name %in% c("winddirection_10m")), year<=2022) %>%
    group_by(month, hour, name) %>%
    summarize(across("value", mean), .groups="drop") %>%
    ggplot(aes(x=factor(hour), y=value)) + 
    geom_point(aes(group=name, color=name)) + 
    facet_wrap(~month) + 
    labs(title="Average wind speed and gust (km/h)", x=NULL, y="km/h") + 
    lims(y=c(0, NA))

# Average wind direction by hour
dfHourlyWind %>%
    filter((name %in% c("winddirection_10m")), year<=2022) %>%
    mutate(preDom=case_when(value<45|value>=315~"N", 
                            value<135~"E", 
                            value<225~"S", 
                            value<315~"W", 
                            TRUE~"error"
                            )
           ) %>%
    count(month, hour, preDom) %>%
    ggplot(aes(x=factor(hour), y=n)) + 
    geom_col(aes(fill=factor(preDom, levels=c("N", "W", "S", "E"))), position="fill") + 
    facet_wrap(~month) + 
    labs(title="Distribution of wind direction", x=NULL, y="Wind direction (%)") + 
    scale_fill_discrete("")

Wind by N/S and E/W is also explored:

# Average wind direction by hour
tmpWindDir <- dfHourlyWind %>%
    filter((name %in% c("winddirection_10m")), year<=2022) %>%
    mutate(ew=case_when(value>30&value<150~"E", 
                        value>210&value<=330~"W", 
                        TRUE~"none"
                        ), 
           ns=case_when(value>300|value<=60~"N", 
                        value>120&value<=240~"S", 
                        TRUE~"none"
                        )
    ) 
tmpWindDir
## # A tibble: 113,952 × 8
##    time                 hour  year month name              value ew    ns   
##    <dttm>              <int> <dbl> <fct> <chr>             <dbl> <chr> <chr>
##  1 2010-01-01 00:00:00     0  2010 Jan   winddirection_10m   298 W     none 
##  2 2010-01-01 01:00:00     1  2010 Jan   winddirection_10m   291 W     none 
##  3 2010-01-01 02:00:00     2  2010 Jan   winddirection_10m   290 W     none 
##  4 2010-01-01 03:00:00     3  2010 Jan   winddirection_10m   289 W     none 
##  5 2010-01-01 04:00:00     4  2010 Jan   winddirection_10m   289 W     none 
##  6 2010-01-01 05:00:00     5  2010 Jan   winddirection_10m   288 W     none 
##  7 2010-01-01 06:00:00     6  2010 Jan   winddirection_10m   287 W     none 
##  8 2010-01-01 07:00:00     7  2010 Jan   winddirection_10m   286 W     none 
##  9 2010-01-01 08:00:00     8  2010 Jan   winddirection_10m   285 W     none 
## 10 2010-01-01 09:00:00     9  2010 Jan   winddirection_10m   282 W     none 
## # … with 113,942 more rows
tmpWindDir %>%
    count(month, hour, ew) %>%
    group_by(month, hour) %>%
    mutate(pct=n/sum(n)) %>%
    ungroup() %>%
    ggplot(aes(x=hour, y=pct)) + 
    geom_line(aes(color=factor(ew, levels=c("W", "none", "E")))) + 
    facet_wrap(~month) + 
    labs(title="Wind direction", 
         x="Hour of day", 
         y="% of observations", 
         subtitle="(030-150 deg defined as East, 210-330 deg defined as West)"
         ) + 
    scale_color_discrete("") + 
    lims(y=c(0, NA))

tmpWindDir %>%
    count(month, hour, ns) %>%
    group_by(month, hour) %>%
    mutate(pct=n/sum(n)) %>%
    ungroup() %>%
    ggplot(aes(x=hour, y=pct)) + 
    geom_line(aes(color=factor(ns, levels=c("S", "none", "N")))) + 
    facet_wrap(~month) + 
    labs(title="Wind direction", 
         x="Hour of day", 
         y="% of observations", 
         subtitle="(300-060 deg defined as North, 120-240 deg defined as South)"
         ) + 
    scale_color_discrete("") + 
    lims(y=c(0, NA))

Hourly wind directions are averaged using arctan. The formula is arctan2(y=sum-of-sin, x=sum-of-cos):

# Unweighted by wind speed
tmpWindatan_uw <- tmpWindDir %>%
    mutate(date=lubridate::date(time), cosine=cos(2*pi*value/360), sine=sin(2*pi*value/360)) %>%
    group_by(date) %>%
    summarize(across(c(cosine, sine), sum), .groups="drop") %>%
    mutate(arctangent=atan(sine/cosine), 
           arctangent2=atan2(y=sine, x=cosine), 
           avgdir=((arctangent2/2)*(360/pi)), 
           avgdir=ifelse(avgdir<0, 360+avgdir, avgdir)
           ) %>%
    left_join(select(tmpOMDaily$tblDaily, date, wdd=winddirection_10m_dominant), by="date")
tmpWindatan_uw
## # A tibble: 4,748 × 7
##    date       cosine   sine arctangent arctangent2 avgdir   wdd
##    <date>      <dbl>  <dbl>      <dbl>       <dbl>  <dbl> <int>
##  1 2010-01-01   8.00 -22.2      -1.22       -1.22    290.   291
##  2 2010-01-02  14.8  -18.2      -0.888      -0.888   309.   309
##  3 2010-01-03  16.2  -17.3      -0.817      -0.817   313.   313
##  4 2010-01-04  13.4  -19.8      -0.976      -0.976   304.   304
##  5 2010-01-05  10.9  -21.3      -1.10       -1.10    297.   298
##  6 2010-01-06   1.50 -22.4      -1.50       -1.50    274.   280
##  7 2010-01-07  -9.62  -6.35      0.583      -2.56    213.   240
##  8 2010-01-08  19.6  -11.1      -0.516      -0.516   330.   334
##  9 2010-01-09  13.5  -19.3      -0.962      -0.962   305.   305
## 10 2010-01-10 -13.7  -17.3       0.900      -2.24    232.   226
## # … with 4,738 more rows
tmpWindatan_uw %>%
    mutate(delta=avgdir-wdd) %>%
    summary()
##       date                cosine             sine           arctangent     
##  Min.   :2010-01-01   Min.   :-23.894   Min.   :-23.930   Min.   :-1.5699  
##  1st Qu.:2013-04-01   1st Qu.:-14.778   1st Qu.:-14.142   1st Qu.:-0.5057  
##  Median :2016-07-01   Median : -2.812   Median : -3.161   Median : 0.2924  
##  Mean   :2016-07-01   Mean   : -1.955   Mean   : -2.518   Mean   : 0.1595  
##  3rd Qu.:2019-10-01   3rd Qu.: 10.799   3rd Qu.:  8.453   3rd Qu.: 0.8423  
##  Max.   :2022-12-31   Max.   : 23.900   Max.   : 23.627   Max.   : 1.5704  
##   arctangent2          avgdir              wdd            delta          
##  Min.   :-3.1408   Min.   :  0.1209   Min.   :  0.0   Min.   :-356.6866  
##  1st Qu.:-2.1208   1st Qu.: 92.7324   1st Qu.: 93.0   1st Qu.:  -2.5483  
##  Median :-0.6638   Median :196.5612   Median :198.5   Median :  -0.0048  
##  Mean   :-0.4307   Mean   :180.2702   Mean   :181.2   Mean   :  -0.9128  
##  3rd Qu.: 0.9929   3rd Qu.:255.9577   3rd Qu.:257.0   3rd Qu.:   2.5971  
##  Max.   : 3.1409   Max.   :359.9807   Max.   :360.0   Max.   : 358.5385
tmpWindatan_uw %>%
    count(wdd, awdd=round(avgdir)) %>%
    ggplot(aes(x=wdd, y=awdd)) + 
    geom_point(aes(size=n)) + 
    labs(x="Reported dominant wind direction (daily data)", 
         y="Calculated dominant wind direction (hourly data)", 
         title="Relationship between reported and calculated dominant wind direction", 
         subtitle="Unweighted by wind speed"
         ) + 
    scale_size_continuous("# days")

# Weighted by wind speed
tmpWindatan_wtd <- tmpOMHourly$tblHourly %>%
    select(date, time, wd=winddirection_10m, ws=windspeed_10m) %>%
    mutate(cosine=cos(2*pi*wd/360), sine=sin(2*pi*wd/360)) %>%
    group_by(date) %>%
    summarize(across(c(cosine, sine), .fns=function(x) sum(x*ws)/sum(ws)), sws=sum(ws), .groups="drop") %>%
    mutate(arctangent=atan(sine/cosine), 
           arctangent2=atan2(y=sine, x=cosine), 
           avgdir=((arctangent2/2)*(360/pi)), 
           avgdir=ifelse(avgdir<0, 360+avgdir, avgdir), 
           avgspd=sws/24, 
           dist=sws*sqrt(cosine**2+sine**2)
           ) %>%
    left_join(select(tmpOMDaily$tblDaily, date, wdd=winddirection_10m_dominant), by="date")
tmpWindatan_wtd
## # A tibble: 4,914 × 10
##    date       cosine   sine   sws arctangent arctang…¹ avgdir avgspd  dist   wdd
##    <date>      <dbl>  <dbl> <dbl>      <dbl>     <dbl>  <dbl>  <dbl> <dbl> <int>
##  1 2010-01-01  0.345 -0.917  463      -1.21     -1.21    291.  19.3   454.   291
##  2 2010-01-02  0.620 -0.757  478.     -0.885    -0.885   309.  19.9   468.   309
##  3 2010-01-03  0.675 -0.722  447.     -0.819    -0.819   313.  18.6   441.   313
##  4 2010-01-04  0.561 -0.823  461.     -0.973    -0.973   304.  19.2   459.   304
##  5 2010-01-05  0.461 -0.884  392.     -1.09     -1.09    298.  16.3   391.   298
##  6 2010-01-06  0.169 -0.945  251.     -1.39     -1.39    280.  10.5   241.   280
##  7 2010-01-07 -0.250 -0.426  210       1.04     -2.10    240.   8.75  104.   240
##  8 2010-01-08  0.857 -0.418  471.     -0.453    -0.453   334.  19.6   449.   334
##  9 2010-01-09  0.569 -0.798  359.     -0.951    -0.951   306.  14.9   351.   305
## 10 2010-01-10 -0.643 -0.672  457.      0.808    -2.33    226.  19.0   425.   226
## # … with 4,904 more rows, and abbreviated variable name ¹​arctangent2
tmpWindatan_wtd %>%
    mutate(delta=avgdir-wdd) %>%
    summary()
##       date                cosine              sine              sws       
##  Min.   :2010-01-01   Min.   :-0.99564   Min.   :-0.9971   Min.   : 85.3  
##  1st Qu.:2013-05-13   1st Qu.:-0.64915   1st Qu.:-0.6081   1st Qu.:247.4  
##  Median :2016-09-22   Median :-0.11700   Median :-0.1535   Median :336.6  
##  Mean   :2016-09-22   Mean   :-0.07922   Mean   :-0.1086   Mean   :354.5  
##  3rd Qu.:2020-02-02   3rd Qu.: 0.47777   3rd Qu.: 0.3654   3rd Qu.:436.6  
##  Max.   :2023-06-15   Max.   : 0.99606   Max.   : 0.9855   Max.   :933.4  
##    arctangent       arctangent2          avgdir            avgspd      
##  Min.   :-1.5707   Min.   :-3.1403   Min.   :  0.015   Min.   : 3.554  
##  1st Qu.:-0.4878   1st Qu.:-2.1529   1st Qu.: 91.028   1st Qu.:10.309  
##  Median : 0.3097   Median :-0.7181   Median :198.011   Median :14.023  
##  Mean   : 0.1642   Mean   :-0.4630   Mean   :180.505   Mean   :14.770  
##  3rd Qu.: 0.8461   3rd Qu.: 0.9509   3rd Qu.:257.184   3rd Qu.:18.191  
##  Max.   : 1.5705   Max.   : 3.1402   Max.   :359.957   Max.   :38.892  
##       dist              wdd            delta          
##  Min.   :  3.001   Min.   :  0.0   Min.   :-359.9850  
##  1st Qu.:186.211   1st Qu.: 91.0   1st Qu.:  -0.2522  
##  Median :285.688   Median :198.0   Median :   0.0037  
##  Mean   :301.524   Mean   :180.7   Mean   :  -0.1459  
##  3rd Qu.:399.462   3rd Qu.:257.0   3rd Qu.:   0.2536  
##  Max.   :920.029   Max.   :360.0   Max.   :   1.5144
tmpWindatan_wtd %>%
    count(wdd, awdd=round(avgdir)) %>%
    ggplot(aes(x=wdd, y=awdd)) + 
    geom_point(aes(size=n)) + 
    labs(x="Reported dominant wind direction (daily data)", 
         y="Calculated dominant wind direction (hourly data)", 
         title="Relationship between reported and calculated dominant wind direction", 
         subtitle="Weighted by wind speed"
         ) + 
    scale_size_continuous("# days")

tmpWindatan_wtd %>%
    count(rdist=round(dist), rspd=round(avgspd)) %>%
    ggplot(aes(x=rspd, y=rdist/24)) + 
    geom_point(aes(size=n)) + 
    labs(title="Average wind speed (total and weighted by direction) by day", 
         x="Average wind speed per day", 
         y="Weighted average wind speed\n(total distance on average angle, divided by 24)"
         ) + 
    geom_abline(slope=1, intercept=0, lty=2) +
    scale_size_continuous("# days")

tmpWindatan_wtd %>%
    mutate(rdist=round(dist), rspd=round(avgspd)) %>%
    ggplot(aes(x=rspd)) + 
    geom_boxplot(aes(y=dist/24/rspd, group=rspd), fill="lightblue") + 
    labs(title="Average wind speed (total and weighted by direction) by day", 
         x="Average wind speed per day", 
         y="Average weighted wind speed\n(as ratio of gross average)"
         )

tmpWindatan_wtd %>%
    filter(abs(avgdir-wdd)>1)
## # A tibble: 7 × 10
##   date        cosine       sine   sws arctangent arctang…¹  avgdir avgspd   dist
##   <date>       <dbl>      <dbl> <dbl>      <dbl>     <dbl>   <dbl>  <dbl>  <dbl>
## 1 2011-12-02  0.174   0.0000455 288.    0.000262  0.000262 1.50e-2  12.0   50.0 
## 2 2013-07-31  0.0206  0.0414    182.    1.11      1.11     6.35e+1   7.58   8.41
## 3 2016-04-22  0.918   0.00197   406.    0.00215   0.00215  1.23e-1  16.9  373.  
## 4 2017-07-31 -0.0306 -0.0569     86.6   1.08     -2.06     2.42e+2   3.61   5.59
## 5 2019-07-17  0.0352  0.0545    139.    0.998     0.998    5.72e+1   5.80   9.03
## 6 2020-11-22 -0.108   0.0523    228.   -0.452     2.69     1.54e+2   9.48  27.3 
## 7 2023-03-01  0.0482  0.0180    251.    0.358     0.358    2.05e+1  10.5   12.9 
## # … with 1 more variable: wdd <int>, and abbreviated variable name ¹​arctangent2

The wind direction averaging of hourly data, weighted by wind speed, is consistent with the reported dominant wind direction in the daily data.

Weather codes are explored:

# Hourly weather codes, evapotranspiration, and shortwave
dfHourlyCode <- tmpOMHourly$tblHourly %>%
    select(time, hour, wc=weathercode, et=et0_fao_evapotranspiration, sw=shortwave_radiation) %>%
    mutate(year=lubridate::year(time), 
           month=factor(month.abb[lubridate::month(time)], levels=month.abb)
           ) %>%
    pivot_longer(cols=-c(time, year, month, hour))
dfHourlyCode
## # A tibble: 353,808 × 6
##    time                 hour  year month name  value
##    <dttm>              <int> <dbl> <fct> <chr> <dbl>
##  1 2010-01-01 00:00:00     0  2010 Jan   wc     2   
##  2 2010-01-01 00:00:00     0  2010 Jan   et     0.02
##  3 2010-01-01 00:00:00     0  2010 Jan   sw     0   
##  4 2010-01-01 01:00:00     1  2010 Jan   wc     1   
##  5 2010-01-01 01:00:00     1  2010 Jan   et     0.01
##  6 2010-01-01 01:00:00     1  2010 Jan   sw     0   
##  7 2010-01-01 02:00:00     2  2010 Jan   wc     0   
##  8 2010-01-01 02:00:00     2  2010 Jan   et     0.01
##  9 2010-01-01 02:00:00     2  2010 Jan   sw     0   
## 10 2010-01-01 03:00:00     3  2010 Jan   wc     0   
## # … with 353,798 more rows
# Exploration of weather codes overall
dfHourlyCode %>%
    filter(name=="wc", year<=2022) %>%
    count(value) %>%
    ggplot(aes(x=fct_rev(factor(value)), y=n/1000)) + 
    geom_col(fill="lightblue") + 
    geom_text(aes(label=round(n/1000, 1)), hjust=0, size=3) +
    labs(title="Weather codes in hourly data (2010-2022)", y="Count (000)", x="Weather Code") + 
    coord_flip()

# Exploration of weather codes by month
dfHourlyCode %>%
    filter(name=="wc", year<=2022) %>%
    count(month, value) %>%
    group_by(month) %>%
    mutate(pct=n/sum(n)) %>%
    ungroup() %>%
    ggplot(aes(x=fct_rev(factor(value)), y=pct)) + 
    geom_col(fill="lightblue") + 
    geom_text(aes(label=paste0(round(100*pct, 1), "%")), hjust=0, size=3) +
    labs(title="Weather codes in hourly data (2010-2022)", y="Frequency", x="Weather Code") + 
    coord_flip() + 
    facet_wrap(~month)

# Exploration of weather codes by month
dfHourlyCode %>%
    filter(name=="wc", year<=2022) %>%
    mutate(wType=case_when(value<=3~"Dry", 
                           value>=51 & value<=55~"Drizzle", 
                           value>=61 & value<=65~"Rain", 
                           value>=71 & value<=75~"Snow", 
                           TRUE~"error"
                           )
           ) %>%
    count(month, wType) %>%
    group_by(month) %>%
    mutate(pct=n/sum(n)) %>%
    ungroup() %>%
    ggplot(aes(x=month, y=pct)) + 
    geom_col(aes(fill=factor(wType, levels=c("Dry", "Drizzle", "Rain", "Snow"))), position="stack") + 
    labs(title="Precipitation types in hourly data (2010-2022)", y="Frequency", x=NULL) + 
    coord_flip() + 
    scale_fill_discrete("")

# Weather codes from WMO Code Table 4677 (select examples)
# 00    Cloud development not observed or not observable
# 01    Clouds generally dissolving or becoming less developed
# 02    State of sky on the whole unchanged
# 03    Clouds generally forming or developing
# 51    Drizzle, not freezing, continuous (slight)
# 53    Drizzle, not freezing, continuous (moderate)
# 55    Drizzle, not freezing, continuous (heavy)
# 61    Rain, not freezing, continuous (slight)
# 63    Rain, not freezing, continuous (moderate)
# 65    Rain, not freezing, continuous (heavy)
# 71    Continuous fall of snowflakes (slight)
# 73    Continuous fall of snowflakes (moderate)
# 75    Continuous fall of snowflakes (heavy)

Shortwave radiation is explored:

dfHourlyCode %>%
    filter(name=="sw", year<=2022) %>%
    ggplot(aes(x=factor(hour), y=value)) + 
    geom_boxplot(fill="lightblue") + 
    facet_wrap(~month) + 
    labs(title="Average shortwave solar radiation over the past hour", x=NULL, y="Watts per sqaure meter")

dfHourlyCode %>%
    filter(name=="sw", year<=2022) %>%
    group_by(hour, month) %>%
    summarize(value=mean(value), .groups="drop") %>%
    ggplot(aes(x=factor(hour), y=value)) + 
    geom_point() + 
    facet_wrap(~month) + 
    labs(title="Average hourly shortwave solar radiation by hour and month (2010-2022)", 
         x=NULL, 
         y="Watts per sqaure meter"
         )

dfHourlyCode %>%
    filter(name %in% c("sw"), year<=2022) %>%
    mutate(date=lubridate::date(time)) %>%
    group_by(date, name) %>% 
    mutate(isMax=ifelse(value==max(value), 1, 0), isMin=ifelse(value==min(value), 1, 0)) %>% 
    group_by(name, month, hour) %>% 
    summarize(across(c(isMax, isMin), mean), .groups="drop") %>% 
    pivot_longer(-c(name, month, hour), names_to="metric") %>%
    ggplot(aes(x=hour, y=value)) + 
    geom_line(aes(color=metric, group=metric)) + 
    facet_wrap(~month) + 
    labs(x="Hour of day", 
         y="% of time as max/min", 
         title=paste0("Shortwave radiation", ": maximum and minimum by hour"), 
         subtitle=paste0("Ties included as full value")
         ) + 
    scale_color_discrete("Metric:")

Evapotranspiration is explored:

dfHourlyCode %>%
    filter(name=="et", year<=2022) %>%
    ggplot(aes(x=factor(hour), y=value)) + 
    geom_boxplot(fill="lightblue") + 
    facet_wrap(~month) + 
    labs(title="Evapotranspiration of a well-watered grass field", x="Hour of day", y="mm")

dfHourlyCode %>%
    filter(name=="et", year<=2022) %>%
    group_by(hour, month) %>%
    summarize(value=mean(value), .groups="drop") %>%
    ggplot(aes(x=factor(hour), y=value)) + 
    geom_point() + 
    facet_wrap(~month) + 
    labs(title="Mean evapotranspiration of a well-watered grass field by hour and month (2010-2022)", 
         x="Hour of day", 
         y="mm"
         )

dfHourlyCode %>%
    filter(name %in% c("et"), year<=2022) %>%
    mutate(date=lubridate::date(time)) %>%
    group_by(date, name) %>% 
    mutate(isMax=ifelse(value==max(value), 1, 0), isMin=ifelse(value==min(value), 1, 0)) %>% 
    group_by(name, month, hour) %>% 
    summarize(across(c(isMax, isMin), mean), .groups="drop") %>% 
    pivot_longer(-c(name, month, hour), names_to="metric") %>%
    ggplot(aes(x=hour, y=value)) + 
    geom_line(aes(color=metric, group=metric)) + 
    facet_wrap(~month) + 
    labs(x="Hour of day", 
         y="proportion of time as max/min", 
         title=paste0("Evapotranspiration", ": maximum and minimum by hour"), 
         subtitle=paste0("Ties included as full value")
         ) + 
    scale_color_discrete("Metric:")

Cloud cover is explored:

# Create cloud cover data
dfHourlyCloud <- tmpOMHourly$tblHourly %>% 
    select(time, hour, contains("cloud")) %>% 
    mutate(year=lubridate::year(time),
           month=factor(month.abb[lubridate::month(time)], levels=month.abb)
           ) %>%
    pivot_longer(cols=-c(time, year, month, hour))
dfHourlyCloud
## # A tibble: 471,744 × 6
##    time                 hour  year month name            value
##    <dttm>              <int> <dbl> <fct> <chr>           <int>
##  1 2010-01-01 00:00:00     0  2010 Jan   cloudcover         62
##  2 2010-01-01 00:00:00     0  2010 Jan   cloudcover_low     69
##  3 2010-01-01 00:00:00     0  2010 Jan   cloudcover_mid      0
##  4 2010-01-01 00:00:00     0  2010 Jan   cloudcover_high     0
##  5 2010-01-01 01:00:00     1  2010 Jan   cloudcover         47
##  6 2010-01-01 01:00:00     1  2010 Jan   cloudcover_low     52
##  7 2010-01-01 01:00:00     1  2010 Jan   cloudcover_mid      0
##  8 2010-01-01 01:00:00     1  2010 Jan   cloudcover_high     0
##  9 2010-01-01 02:00:00     2  2010 Jan   cloudcover         20
## 10 2010-01-01 02:00:00     2  2010 Jan   cloudcover_low     22
## # … with 471,734 more rows
# Boxplot for cloud cover types
for(keyMetric in unique(dfHourlyCloud$name)) {

    p1 <- dfHourlyCloud %>% 
        filter(name==keyMetric, year<=2022) %>% 
        ggplot(aes(x=factor(hour), y=value)) + 
        geom_boxplot(fill="lightblue") + 
        facet_wrap(~month) + 
        labs(x="Hour of day", 
             y="% sky covered with cloud", 
             title=paste0(keyMetric, ": % sky covered with cloud")
             )
    print(p1)

}

# Create max/min for metric
tmpDFCloud <- dfHourlyCloud %>%
    filter(year<=2022) %>%
    mutate(date=lubridate::date(time)) %>%
    group_by(date, name) %>% 
    mutate(isMax=ifelse(value==max(value), 1, 0), isMin=ifelse(value==min(value), 1, 0)) %>% 
    group_by(name, month, hour) %>% 
    summarize(across(c(isMax, isMin), mean), .groups="drop") %>% 
    pivot_longer(-c(name, month, hour), names_to="metric")
tmpDFCloud
## # A tibble: 2,304 × 5
##    name       month  hour metric value
##    <chr>      <fct> <int> <chr>  <dbl>
##  1 cloudcover Jan       0 isMax  0.300
##  2 cloudcover Jan       0 isMin  0.208
##  3 cloudcover Jan       1 isMax  0.273
##  4 cloudcover Jan       1 isMin  0.161
##  5 cloudcover Jan       2 isMax  0.266
##  6 cloudcover Jan       2 isMin  0.132
##  7 cloudcover Jan       3 isMax  0.283
##  8 cloudcover Jan       3 isMin  0.141
##  9 cloudcover Jan       4 isMax  0.298
## 10 cloudcover Jan       4 isMin  0.141
## # … with 2,294 more rows
# Plot max/min for metric
for(keyMetric in unique(tmpDFCloud$name)) {

    p1 <- tmpDFCloud %>% 
        filter(name==keyMetric) %>% 
        ggplot(aes(x=hour, y=value)) + 
        geom_line(aes(color=metric, group=metric)) + 
        facet_wrap(~month) + 
        labs(x="Hour of day", 
             y="% of time as max/min", 
             title=paste0(keyMetric, ": maximum and minimum by hour"), 
             subtitle=paste0("Ties included as full value")
             ) + 
        scale_color_discrete("Metric:")
    print(p1)
    
}